Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * compression.c
4 : *
5 : * Shared code for compression methods and specifications.
6 : *
7 : * A compression specification specifies the parameters that should be used
8 : * when performing compression with a specific algorithm. The simplest
9 : * possible compression specification is an integer, which sets the
10 : * compression level.
11 : *
12 : * Otherwise, a compression specification is a comma-separated list of items,
13 : * each having the form keyword or keyword=value.
14 : *
15 : * Currently, the supported keywords are "level", "long", and "workers".
16 : *
17 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
18 : *
19 : * IDENTIFICATION
20 : * src/common/compression.c
21 : *-------------------------------------------------------------------------
22 : */
23 :
24 : #ifndef FRONTEND
25 : #include "postgres.h"
26 : #else
27 : #include "postgres_fe.h"
28 : #endif
29 :
30 : #ifdef USE_ZSTD
31 : #include <zstd.h>
32 : #endif
33 : #ifdef HAVE_LIBZ
34 : #include <zlib.h>
35 : #endif
36 :
37 : #include "common/compression.h"
38 :
39 : static int expect_integer_value(char *keyword, char *value,
40 : pg_compress_specification *result);
41 : static bool expect_boolean_value(char *keyword, char *value,
42 : pg_compress_specification *result);
43 :
44 : /*
45 : * Look up a compression algorithm by name. Returns true and sets *algorithm
46 : * if the name is recognized. Otherwise returns false.
47 : */
48 : bool
362 michael 49 GIC 309 : parse_compress_algorithm(char *name, pg_compress_algorithm *algorithm)
50 : {
382 rhaas 51 CBC 309 : if (strcmp(name, "none") == 0)
362 michael 52 GIC 256 : *algorithm = PG_COMPRESSION_NONE;
382 rhaas 53 CBC 53 : else if (strcmp(name, "gzip") == 0)
362 michael 54 32 : *algorithm = PG_COMPRESSION_GZIP;
382 rhaas 55 21 : else if (strcmp(name, "lz4") == 0)
362 michael 56 7 : *algorithm = PG_COMPRESSION_LZ4;
382 rhaas 57 14 : else if (strcmp(name, "zstd") == 0)
362 michael 58 10 : *algorithm = PG_COMPRESSION_ZSTD;
382 rhaas 59 ECB : else
382 rhaas 60 CBC 4 : return false;
382 rhaas 61 GIC 305 : return true;
382 rhaas 62 ECB : }
63 :
64 : /*
65 : * Get the human-readable name corresponding to a particular compression
66 : * algorithm.
67 : */
68 : const char *
362 michael 69 GIC 15 : get_compress_algorithm_name(pg_compress_algorithm algorithm)
70 : {
382 rhaas 71 CBC 15 : switch (algorithm)
72 : {
362 michael 73 3 : case PG_COMPRESSION_NONE:
382 rhaas 74 GIC 3 : return "none";
362 michael 75 CBC 10 : case PG_COMPRESSION_GZIP:
382 rhaas 76 10 : return "gzip";
362 michael 77 1 : case PG_COMPRESSION_LZ4:
382 rhaas 78 1 : return "lz4";
362 michael 79 1 : case PG_COMPRESSION_ZSTD:
382 rhaas 80 1 : return "zstd";
382 rhaas 81 ECB : /* no default, to provoke compiler warnings if values are added */
82 : }
382 rhaas 83 UIC 0 : Assert(false);
84 : return "???"; /* placate compiler */
382 rhaas 85 EUB : }
86 :
87 : /*
88 : * Parse a compression specification for a specified algorithm.
89 : *
90 : * See the file header comments for a brief description of what a compression
91 : * specification is expected to look like.
92 : *
93 : * On return, all fields of the result object will be initialized.
94 : * In particular, result->parse_error will be NULL if no errors occurred
95 : * during parsing, and will otherwise contain an appropriate error message.
96 : * The caller may free this error message string using pfree, if desired.
97 : * Note, however, even if there's no parse error, the string might not make
98 : * sense: e.g. for gzip, level=12 is not sensible, but it does parse OK.
99 : *
100 : * The compression level is assigned by default if not directly specified
101 : * by the specification.
102 : *
103 : * Use validate_compress_specification() to find out whether a compression
104 : * specification is semantically sensible.
105 : */
106 : void
362 michael 107 GIC 319 : parse_compress_specification(pg_compress_algorithm algorithm, char *specification,
108 : pg_compress_specification *result)
382 rhaas 109 ECB : {
110 : int bare_level;
111 : char *bare_level_endp;
112 :
113 : /* Initial setup of result object. */
382 rhaas 114 GIC 319 : result->algorithm = algorithm;
115 319 : result->options = 0;
382 rhaas 116 CBC 319 : result->parse_error = NULL;
382 rhaas 117 ECB :
207 michael 118 : /*
119 : * Assign a default level depending on the compression method. This may
120 : * be enforced later.
121 : */
207 michael 122 GIC 319 : switch (result->algorithm)
123 : {
207 michael 124 CBC 254 : case PG_COMPRESSION_NONE:
207 michael 125 GIC 254 : result->level = 0;
207 michael 126 CBC 254 : break;
127 7 : case PG_COMPRESSION_LZ4:
207 michael 128 ECB : #ifdef USE_LZ4
207 michael 129 CBC 7 : result->level = 0; /* fast compression mode */
130 : #else
207 michael 131 ECB : result->parse_error =
132 : psprintf(_("this build does not support compression with %s"),
133 : "LZ4");
134 : #endif
207 michael 135 GIC 7 : break;
136 10 : case PG_COMPRESSION_ZSTD:
207 michael 137 ECB : #ifdef USE_ZSTD
207 michael 138 CBC 10 : result->level = ZSTD_CLEVEL_DEFAULT;
139 : #else
207 michael 140 ECB : result->parse_error =
141 : psprintf(_("this build does not support compression with %s"),
142 : "ZSTD");
143 : #endif
207 michael 144 GIC 10 : break;
145 48 : case PG_COMPRESSION_GZIP:
207 michael 146 ECB : #ifdef HAVE_LIBZ
207 michael 147 CBC 48 : result->level = Z_DEFAULT_COMPRESSION;
148 : #else
207 michael 149 ECB : result->parse_error =
150 : psprintf(_("this build does not support compression with %s"),
151 : "gzip");
152 : #endif
207 michael 153 GIC 48 : break;
154 : }
207 michael 155 ECB :
156 : /* If there is no specification, we're done already. */
382 rhaas 157 GIC 319 : if (specification == NULL)
158 295 : return;
382 rhaas 159 ECB :
160 : /* As a special case, the specification can be a bare integer. */
382 rhaas 161 GIC 44 : bare_level = strtol(specification, &bare_level_endp, 10);
162 44 : if (specification != bare_level_endp && *bare_level_endp == '\0')
382 rhaas 163 ECB : {
382 rhaas 164 CBC 20 : result->level = bare_level;
382 rhaas 165 GIC 20 : return;
382 rhaas 166 ECB : }
167 :
168 : /* Look for comma-separated keyword or keyword=value entries. */
169 : while (1)
382 rhaas 170 GIC 4 : {
171 : char *kwstart;
382 rhaas 172 ECB : char *kwend;
173 : char *vstart;
174 : char *vend;
175 : int kwlen;
176 : int vlen;
177 : bool has_value;
178 : char *keyword;
179 : char *value;
180 :
181 : /* Figure start, end, and length of next keyword and any value. */
382 rhaas 182 GIC 28 : kwstart = kwend = specification;
183 152 : while (*kwend != '\0' && *kwend != ',' && *kwend != '=')
382 rhaas 184 CBC 124 : ++kwend;
185 28 : kwlen = kwend - kwstart;
186 28 : if (*kwend != '=')
382 rhaas 187 ECB : {
382 rhaas 188 CBC 14 : vstart = vend = NULL;
382 rhaas 189 GIC 14 : vlen = 0;
382 rhaas 190 CBC 14 : has_value = false;
382 rhaas 191 ECB : }
192 : else
193 : {
382 rhaas 194 GIC 14 : vstart = vend = kwend + 1;
195 36 : while (*vend != '\0' && *vend != ',')
382 rhaas 196 CBC 22 : ++vend;
197 14 : vlen = vend - vstart;
198 14 : has_value = true;
382 rhaas 199 ECB : }
200 :
201 : /* Reject empty keyword. */
382 rhaas 202 GIC 28 : if (kwlen == 0)
203 : {
382 rhaas 204 CBC 4 : result->parse_error =
382 rhaas 205 GIC 4 : pstrdup(_("found empty string where a compression option was expected"));
382 rhaas 206 CBC 4 : break;
382 rhaas 207 ECB : }
208 :
209 : /* Extract keyword and value as separate C strings. */
382 rhaas 210 GIC 24 : keyword = palloc(kwlen + 1);
211 24 : memcpy(keyword, kwstart, kwlen);
382 rhaas 212 CBC 24 : keyword[kwlen] = '\0';
213 24 : if (!has_value)
214 10 : value = NULL;
382 rhaas 215 ECB : else
216 : {
382 rhaas 217 GIC 14 : value = palloc(vlen + 1);
218 14 : memcpy(value, vstart, vlen);
382 rhaas 219 CBC 14 : value[vlen] = '\0';
382 rhaas 220 ECB : }
221 :
222 : /* Handle whatever keyword we found. */
382 rhaas 223 GIC 24 : if (strcmp(keyword, "level") == 0)
224 : {
382 rhaas 225 CBC 12 : result->level = expect_integer_value(keyword, value, result);
226 :
207 michael 227 ECB : /*
228 : * No need to set a flag in "options", there is a default level
229 : * set at least thanks to the logic above.
230 : */
231 : }
375 rhaas 232 GIC 12 : else if (strcmp(keyword, "workers") == 0)
233 : {
375 rhaas 234 CBC 4 : result->workers = expect_integer_value(keyword, value, result);
362 michael 235 GIC 4 : result->options |= PG_COMPRESSION_OPTION_WORKERS;
375 rhaas 236 ECB : }
3 tomas.vondra 237 GNC 8 : else if (strcmp(keyword, "long") == 0)
238 : {
239 5 : result->long_distance = expect_boolean_value(keyword, value, result);
240 5 : result->options |= PG_COMPRESSION_OPTION_LONG_DISTANCE;
241 : }
382 rhaas 242 ECB : else
382 rhaas 243 GIC 3 : result->parse_error =
197 peter 244 CBC 3 : psprintf(_("unrecognized compression option: \"%s\""), keyword);
245 :
382 rhaas 246 ECB : /* Release memory, just to be tidy. */
382 rhaas 247 CBC 24 : pfree(keyword);
382 rhaas 248 GIC 24 : if (value != NULL)
249 14 : pfree(value);
382 rhaas 250 ECB :
375 251 : /*
252 : * If we got an error or have reached the end of the string, stop.
253 : *
254 : * If there is no value, then the end of the keyword might have been
255 : * the end of the string. If there is a value, then the end of the
256 : * keyword cannot have been the end of the string, but the end of the
257 : * value might have been.
258 : */
375 rhaas 259 GIC 24 : if (result->parse_error != NULL ||
260 15 : (vend == NULL ? *kwend == '\0' : *vend == '\0'))
261 : break;
262 :
263 : /* Advance to next entry and loop around. */
382 264 4 : specification = vend == NULL ? kwend + 1 : vend + 1;
265 : }
382 rhaas 266 ECB : }
267 :
268 : /*
269 : * Parse 'value' as an integer and return the result.
270 : *
271 : * If parsing fails, set result->parse_error to an appropriate message
272 : * and return -1.
273 : */
274 : static int
362 michael 275 GIC 16 : expect_integer_value(char *keyword, char *value, pg_compress_specification *result)
276 : {
277 : int ivalue;
278 : char *ivalue_endp;
279 :
382 rhaas 280 16 : if (value == NULL)
281 : {
382 rhaas 282 CBC 2 : result->parse_error =
382 rhaas 283 GIC 2 : psprintf(_("compression option \"%s\" requires a value"),
284 : keyword);
285 2 : return -1;
286 : }
382 rhaas 287 ECB :
382 rhaas 288 GIC 14 : ivalue = strtol(value, &ivalue_endp, 10);
382 rhaas 289 CBC 14 : if (ivalue_endp == value || *ivalue_endp != '\0')
382 rhaas 290 ECB : {
382 rhaas 291 GIC 4 : result->parse_error =
382 rhaas 292 CBC 4 : psprintf(_("value for compression option \"%s\" must be an integer"),
293 : keyword);
382 rhaas 294 GIC 4 : return -1;
382 rhaas 295 ECB : }
382 rhaas 296 CBC 10 : return ivalue;
297 : }
382 rhaas 298 ECB :
299 : /*
300 : * Parse 'value' as a boolean and return the result.
301 : *
302 : * If parsing fails, set result->parse_error to an appropriate message
303 : * and return -1. The caller must check result->parse_error to determine if
304 : * the call was successful.
305 : *
306 : * Valid values are: yes, no, on, off, 1, 0.
307 : *
308 : * Inspired by ParseVariableBool().
309 : */
310 : static bool
3 tomas.vondra 311 GNC 5 : expect_boolean_value(char *keyword, char *value, pg_compress_specification *result)
312 : {
313 5 : if (value == NULL)
314 5 : return true;
315 :
3 tomas.vondra 316 UNC 0 : if (pg_strcasecmp(value, "yes") == 0)
317 0 : return true;
318 0 : if (pg_strcasecmp(value, "on") == 0)
319 0 : return true;
320 0 : if (pg_strcasecmp(value, "1") == 0)
321 0 : return true;
322 :
323 0 : if (pg_strcasecmp(value, "no") == 0)
324 0 : return false;
325 0 : if (pg_strcasecmp(value, "off") == 0)
326 0 : return false;
327 0 : if (pg_strcasecmp(value, "0") == 0)
328 0 : return false;
329 :
330 0 : result->parse_error =
331 0 : psprintf(_("value for compression option \"%s\" must be a boolean"),
332 : keyword);
333 0 : return false;
334 : }
335 :
382 rhaas 336 ECB : /*
337 : * Returns NULL if the compression specification string was syntactically
338 : * valid and semantically sensible. Otherwise, returns an error message.
339 : *
340 : * Does not test whether this build of PostgreSQL supports the requested
341 : * compression method.
342 : */
343 : char *
362 michael 344 GIC 303 : validate_compress_specification(pg_compress_specification *spec)
345 : {
207 346 303 : int min_level = 1;
347 303 : int max_level = 1;
348 303 : int default_level = 0;
349 :
350 : /* If it didn't even parse OK, it's definitely no good. */
382 rhaas 351 303 : if (spec->parse_error != NULL)
352 13 : return spec->parse_error;
353 :
354 : /*
207 michael 355 ECB : * Check that the algorithm expects a compression level and it is within
356 : * the legal range for the algorithm.
382 rhaas 357 : */
207 michael 358 CBC 290 : switch (spec->algorithm)
359 : {
207 michael 360 GBC 19 : case PG_COMPRESSION_GZIP:
382 rhaas 361 19 : max_level = 9;
207 michael 362 EUB : #ifdef HAVE_LIBZ
207 michael 363 GBC 19 : default_level = Z_DEFAULT_COMPRESSION;
207 michael 364 EUB : #endif
207 michael 365 GBC 19 : break;
207 michael 366 GIC 7 : case PG_COMPRESSION_LZ4:
382 rhaas 367 GBC 7 : max_level = 12;
207 michael 368 7 : default_level = 0; /* fast mode */
369 7 : break;
370 10 : case PG_COMPRESSION_ZSTD:
207 michael 371 EUB : #ifdef USE_ZSTD
199 michael 372 GBC 10 : max_level = ZSTD_maxCLevel();
199 michael 373 GIC 10 : min_level = ZSTD_minCLevel();
207 michael 374 GBC 10 : default_level = ZSTD_CLEVEL_DEFAULT;
207 michael 375 EUB : #endif
207 michael 376 GIC 10 : break;
207 michael 377 GBC 254 : case PG_COMPRESSION_NONE:
207 michael 378 GIC 254 : if (spec->level != 0)
379 3 : return psprintf(_("compression algorithm \"%s\" does not accept a compression level"),
380 : get_compress_algorithm_name(spec->algorithm));
381 251 : break;
382 : }
383 :
384 287 : if ((spec->level < min_level || spec->level > max_level) &&
385 264 : spec->level != default_level)
386 3 : return psprintf(_("compression algorithm \"%s\" expects a compression level between %d and %d (default at %d)"),
387 : get_compress_algorithm_name(spec->algorithm),
207 michael 388 ECB : min_level, max_level, default_level);
389 :
375 rhaas 390 : /*
391 : * Of the compression algorithms that we currently support, only zstd
392 : * allows parallel workers.
393 : */
362 michael 394 GIC 284 : if ((spec->options & PG_COMPRESSION_OPTION_WORKERS) != 0 &&
362 michael 395 CBC 4 : (spec->algorithm != PG_COMPRESSION_ZSTD))
375 rhaas 396 ECB : {
375 rhaas 397 GIC 2 : return psprintf(_("compression algorithm \"%s\" does not accept a worker count"),
398 : get_compress_algorithm_name(spec->algorithm));
399 : }
400 :
401 : /*
402 : * Of the compression algorithms that we currently support, only zstd
403 : * supports long-distance mode.
404 : */
3 tomas.vondra 405 GNC 282 : if ((spec->options & PG_COMPRESSION_OPTION_LONG_DISTANCE) != 0 &&
406 5 : (spec->algorithm != PG_COMPRESSION_ZSTD))
407 : {
408 2 : return psprintf(_("compression algorithm \"%s\" does not support long-distance mode"),
409 : get_compress_algorithm_name(spec->algorithm));
410 : }
411 :
382 rhaas 412 GIC 280 : return NULL;
382 rhaas 413 ECB : }
414 :
415 : #ifdef FRONTEND
416 :
417 : /*
418 : * Basic parsing of a value specified through a command-line option, commonly
419 : * -Z/--compress.
420 : *
421 : * The parsing consists of a METHOD:DETAIL string fed later to
422 : * parse_compress_specification(). This only extracts METHOD and DETAIL.
423 : * If only an integer is found, the method is implied by the value specified.
424 : */
425 : void
130 michael 426 GNC 55 : parse_compress_options(const char *option, char **algorithm, char **detail)
427 : {
428 : char *sep;
429 : char *endp;
430 : long result;
431 :
432 : /*
433 : * Check whether the compression specification consists of a bare integer.
434 : *
435 : * For backward-compatibility, assume "none" if the integer found is zero
436 : * and "gzip" otherwise.
437 : */
438 55 : result = strtol(option, &endp, 10);
439 55 : if (*endp == '\0')
440 : {
441 6 : if (result == 0)
442 : {
130 michael 443 UNC 0 : *algorithm = pstrdup("none");
444 0 : *detail = NULL;
445 : }
446 : else
447 : {
130 michael 448 GNC 6 : *algorithm = pstrdup("gzip");
449 6 : *detail = pstrdup(option);
450 : }
451 6 : return;
452 : }
453 :
454 : /*
455 : * Check whether there is a compression detail following the algorithm
456 : * name.
457 : */
458 49 : sep = strchr(option, ':');
459 49 : if (sep == NULL)
460 : {
461 11 : *algorithm = pstrdup(option);
462 11 : *detail = NULL;
463 : }
464 : else
465 : {
466 : char *alg;
467 :
468 38 : alg = palloc((sep - option) + 1);
469 38 : memcpy(alg, option, sep - option);
470 38 : alg[sep - option] = '\0';
471 :
472 38 : *algorithm = alg;
473 38 : *detail = pstrdup(sep + 1);
474 : }
475 : }
476 : #endif /* FRONTEND */
|