Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parse_manifest.c
4 : * Parse a backup manifest in JSON format.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : * src/bin/pg_verifybackup/parse_manifest.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 :
14 : #include "postgres_fe.h"
15 :
16 : #include "parse_manifest.h"
17 : #include "common/jsonapi.h"
18 :
19 : /*
20 : * Semantic states for JSON manifest parsing.
21 : */
22 : typedef enum
23 : {
24 : JM_EXPECT_TOPLEVEL_START,
25 : JM_EXPECT_TOPLEVEL_END,
26 : JM_EXPECT_TOPLEVEL_FIELD,
27 : JM_EXPECT_VERSION_VALUE,
28 : JM_EXPECT_FILES_START,
29 : JM_EXPECT_FILES_NEXT,
30 : JM_EXPECT_THIS_FILE_FIELD,
31 : JM_EXPECT_THIS_FILE_VALUE,
32 : JM_EXPECT_WAL_RANGES_START,
33 : JM_EXPECT_WAL_RANGES_NEXT,
34 : JM_EXPECT_THIS_WAL_RANGE_FIELD,
35 : JM_EXPECT_THIS_WAL_RANGE_VALUE,
36 : JM_EXPECT_MANIFEST_CHECKSUM_VALUE,
37 : JM_EXPECT_EOF
38 : } JsonManifestSemanticState;
39 :
40 : /*
41 : * Possible fields for one file as described by the manifest.
42 : */
43 : typedef enum
44 : {
45 : JMFF_PATH,
46 : JMFF_ENCODED_PATH,
47 : JMFF_SIZE,
48 : JMFF_LAST_MODIFIED,
49 : JMFF_CHECKSUM_ALGORITHM,
50 : JMFF_CHECKSUM
51 : } JsonManifestFileField;
52 :
53 : /*
54 : * Possible fields for one file as described by the manifest.
55 : */
56 : typedef enum
57 : {
58 : JMWRF_TIMELINE,
59 : JMWRF_START_LSN,
60 : JMWRF_END_LSN
61 : } JsonManifestWALRangeField;
62 :
63 : /*
64 : * Internal state used while decoding the JSON-format backup manifest.
65 : */
66 : typedef struct
67 : {
68 : JsonManifestParseContext *context;
69 : JsonManifestSemanticState state;
70 :
71 : /* These fields are used for parsing objects in the list of files. */
72 : JsonManifestFileField file_field;
73 : char *pathname;
74 : char *encoded_pathname;
75 : char *size;
76 : char *algorithm;
77 : pg_checksum_type checksum_algorithm;
78 : char *checksum;
79 :
80 : /* These fields are used for parsing objects in the list of WAL ranges. */
81 : JsonManifestWALRangeField wal_range_field;
82 : char *timeline;
83 : char *start_lsn;
84 : char *end_lsn;
85 :
86 : /* Miscellaneous other stuff. */
87 : bool saw_version_field;
88 : char *manifest_checksum;
89 : } JsonManifestParseState;
90 :
91 : static JsonParseErrorType json_manifest_object_start(void *state);
92 : static JsonParseErrorType json_manifest_object_end(void *state);
93 : static JsonParseErrorType json_manifest_array_start(void *state);
94 : static JsonParseErrorType json_manifest_array_end(void *state);
95 : static JsonParseErrorType json_manifest_object_field_start(void *state, char *fname,
96 : bool isnull);
97 : static JsonParseErrorType json_manifest_scalar(void *state, char *token,
98 : JsonTokenType tokentype);
99 : static void json_manifest_finalize_file(JsonManifestParseState *parse);
100 : static void json_manifest_finalize_wal_range(JsonManifestParseState *parse);
101 : static void verify_manifest_checksum(JsonManifestParseState *parse,
102 : char *buffer, size_t size);
103 : static void json_manifest_parse_failure(JsonManifestParseContext *context,
104 : char *msg);
105 :
106 : static int hexdecode_char(char c);
107 : static bool hexdecode_string(uint8 *result, char *input, int nbytes);
108 : static bool parse_xlogrecptr(XLogRecPtr *result, char *input);
109 :
110 : /*
111 : * Main entrypoint to parse a JSON-format backup manifest.
112 : *
113 : * Caller should set up the parsing context and then invoke this function.
114 : * For each file whose information is extracted from the manifest,
115 : * context->perfile_cb is invoked. In case of trouble, context->error_cb is
116 : * invoked and is expected not to return.
117 : */
118 : void
1101 rhaas 119 CBC 90 : json_parse_manifest(JsonManifestParseContext *context, char *buffer,
120 : size_t size)
121 : {
122 : JsonLexContext *lex;
123 : JsonParseErrorType json_error;
124 : JsonSemAction sem;
125 : JsonManifestParseState parse;
126 :
127 : /* Set up our private parsing context. */
128 90 : parse.context = context;
129 90 : parse.state = JM_EXPECT_TOPLEVEL_START;
130 90 : parse.saw_version_field = false;
131 :
132 : /* Create a JSON lexing context. */
133 90 : lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true);
134 :
135 : /* Set up semantic actions. */
136 90 : sem.semstate = &parse;
137 90 : sem.object_start = json_manifest_object_start;
138 90 : sem.object_end = json_manifest_object_end;
139 90 : sem.array_start = json_manifest_array_start;
140 90 : sem.array_end = json_manifest_array_end;
141 90 : sem.object_field_start = json_manifest_object_field_start;
142 90 : sem.object_field_end = NULL;
143 90 : sem.array_element_start = NULL;
144 90 : sem.array_element_end = NULL;
145 90 : sem.scalar = json_manifest_scalar;
146 :
147 : /* Run the actual JSON parser. */
148 90 : json_error = pg_parse_json(lex, &sem);
149 65 : if (json_error != JSON_SUCCESS)
646 michael 150 1 : json_manifest_parse_failure(context, "parsing failed");
1101 rhaas 151 64 : if (parse.state != JM_EXPECT_EOF)
1101 rhaas 152 UBC 0 : json_manifest_parse_failure(context, "manifest ended unexpectedly");
153 :
154 : /* Verify the manifest checksum. */
1101 rhaas 155 CBC 64 : verify_manifest_checksum(&parse, buffer, size);
156 60 : }
157 :
158 : /*
159 : * Invoked at the start of each object in the JSON document.
160 : *
161 : * The document as a whole is expected to be an object; each file and each
162 : * WAL range is also expected to be an object. If we're anywhere else in the
163 : * document, it's an error.
164 : */
165 : static JsonParseErrorType
166 59171 : json_manifest_object_start(void *state)
167 : {
168 59171 : JsonManifestParseState *parse = state;
169 :
170 59171 : switch (parse->state)
171 : {
172 89 : case JM_EXPECT_TOPLEVEL_START:
173 89 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
174 89 : break;
175 59012 : case JM_EXPECT_FILES_NEXT:
176 59012 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
177 59012 : parse->pathname = NULL;
178 59012 : parse->encoded_pathname = NULL;
179 59012 : parse->size = NULL;
180 59012 : parse->algorithm = NULL;
181 59012 : parse->checksum = NULL;
182 59012 : break;
183 69 : case JM_EXPECT_WAL_RANGES_NEXT:
184 69 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
185 69 : parse->timeline = NULL;
186 69 : parse->start_lsn = NULL;
187 69 : parse->end_lsn = NULL;
188 69 : break;
189 1 : default:
190 1 : json_manifest_parse_failure(parse->context,
191 : "unexpected object start");
1101 rhaas 192 UBC 0 : break;
193 : }
194 :
119 tgl 195 GNC 59170 : return JSON_SUCCESS;
196 : }
1101 rhaas 197 ECB :
198 : /*
199 : * Invoked at the end of each object in the JSON document.
200 : *
201 : * The possible cases here are the same as for json_manifest_object_start.
202 : * There's nothing special to do at the end of the document, but when we
203 : * reach the end of an object representing a particular file or WAL range,
204 : * we must call json_manifest_finalize_file() to save the associated details.
205 : */
206 : static JsonParseErrorType
1101 rhaas 207 GIC 59145 : json_manifest_object_end(void *state)
208 : {
1101 rhaas 209 CBC 59145 : JsonManifestParseState *parse = state;
210 :
211 59145 : switch (parse->state)
212 : {
213 64 : case JM_EXPECT_TOPLEVEL_END:
1101 rhaas 214 GIC 64 : parse->state = JM_EXPECT_EOF;
1101 rhaas 215 CBC 64 : break;
216 59011 : case JM_EXPECT_THIS_FILE_FIELD:
217 59011 : json_manifest_finalize_file(parse);
218 59002 : parse->state = JM_EXPECT_FILES_NEXT;
219 59002 : break;
220 68 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
221 68 : json_manifest_finalize_wal_range(parse);
222 62 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
223 62 : break;
224 2 : default:
225 2 : json_manifest_parse_failure(parse->context,
1101 rhaas 226 ECB : "unexpected object end");
1101 rhaas 227 LBC 0 : break;
228 : }
229 :
119 tgl 230 GNC 59128 : return JSON_SUCCESS;
1101 rhaas 231 EUB : }
232 :
233 : /*
1101 rhaas 234 ECB : * Invoked at the start of each array in the JSON document.
235 : *
236 : * Within the toplevel object, the value associated with the "Files" key
237 : * should be an array. Similarly for the "WAL-Ranges" key. No other arrays
238 : * are expected.
239 : */
240 : static JsonParseErrorType
1101 rhaas 241 GIC 144 : json_manifest_array_start(void *state)
242 : {
243 144 : JsonManifestParseState *parse = state;
244 :
1101 rhaas 245 CBC 144 : switch (parse->state)
246 : {
247 74 : case JM_EXPECT_FILES_START:
1101 rhaas 248 GIC 74 : parse->state = JM_EXPECT_FILES_NEXT;
1101 rhaas 249 CBC 74 : break;
1101 rhaas 250 GIC 69 : case JM_EXPECT_WAL_RANGES_START:
1101 rhaas 251 CBC 69 : parse->state = JM_EXPECT_WAL_RANGES_NEXT;
252 69 : break;
253 1 : default:
254 1 : json_manifest_parse_failure(parse->context,
1101 rhaas 255 ECB : "unexpected array start");
1101 rhaas 256 LBC 0 : break;
1101 rhaas 257 ECB : }
258 :
119 tgl 259 GNC 143 : return JSON_SUCCESS;
1101 rhaas 260 ECB : }
261 :
1101 rhaas 262 EUB : /*
263 : * Invoked at the end of each array in the JSON document.
264 : *
1101 rhaas 265 ECB : * The cases here are analogous to those in json_manifest_array_start.
266 : */
267 : static JsonParseErrorType
1101 rhaas 268 GIC 126 : json_manifest_array_end(void *state)
269 : {
270 126 : JsonManifestParseState *parse = state;
271 :
272 126 : switch (parse->state)
273 : {
1101 rhaas 274 CBC 126 : case JM_EXPECT_FILES_NEXT:
275 : case JM_EXPECT_WAL_RANGES_NEXT:
276 126 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
1101 rhaas 277 GIC 126 : break;
1101 rhaas 278 LBC 0 : default:
1101 rhaas 279 UIC 0 : json_manifest_parse_failure(parse->context,
1101 rhaas 280 ECB : "unexpected array end");
1101 rhaas 281 UIC 0 : break;
1101 rhaas 282 ECB : }
283 :
119 tgl 284 GNC 126 : return JSON_SUCCESS;
1101 rhaas 285 ECB : }
1101 rhaas 286 EUB :
287 : /*
288 : * Invoked at the start of each object field in the JSON document.
289 : */
290 : static JsonParseErrorType
1101 rhaas 291 GIC 293592 : json_manifest_object_field_start(void *state, char *fname, bool isnull)
1101 rhaas 292 ECB : {
1101 rhaas 293 GIC 293592 : JsonManifestParseState *parse = state;
294 :
295 293592 : switch (parse->state)
296 : {
297 297 : case JM_EXPECT_TOPLEVEL_FIELD:
298 :
1101 rhaas 299 ECB : /*
300 : * Inside toplevel object. The version indicator should always be
301 : * the first field.
302 : */
1101 rhaas 303 CBC 297 : if (!parse->saw_version_field)
304 : {
305 87 : if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0)
1101 rhaas 306 GIC 1 : json_manifest_parse_failure(parse->context,
307 : "expected version indicator");
308 86 : parse->state = JM_EXPECT_VERSION_VALUE;
309 86 : parse->saw_version_field = true;
310 86 : break;
1101 rhaas 311 ECB : }
312 :
313 : /* Is this the list of files? */
1101 rhaas 314 CBC 210 : if (strcmp(fname, "Files") == 0)
315 : {
316 76 : parse->state = JM_EXPECT_FILES_START;
317 76 : break;
1101 rhaas 318 ECB : }
319 :
320 : /* Is this the list of WAL ranges? */
1101 rhaas 321 GIC 134 : if (strcmp(fname, "WAL-Ranges") == 0)
1101 rhaas 322 ECB : {
1101 rhaas 323 GIC 69 : parse->state = JM_EXPECT_WAL_RANGES_START;
1101 rhaas 324 CBC 69 : break;
1101 rhaas 325 ECB : }
326 :
327 : /* Is this the manifest checksum? */
1101 rhaas 328 GIC 65 : if (strcmp(fname, "Manifest-Checksum") == 0)
1101 rhaas 329 ECB : {
1101 rhaas 330 GIC 64 : parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE;
1101 rhaas 331 CBC 64 : break;
1101 rhaas 332 ECB : }
333 :
334 : /* It's not a field we recognize. */
1101 rhaas 335 GIC 1 : json_manifest_parse_failure(parse->context,
937 peter 336 ECB : "unrecognized top-level field");
1101 rhaas 337 UIC 0 : break;
1101 rhaas 338 ECB :
1101 rhaas 339 CBC 293096 : case JM_EXPECT_THIS_FILE_FIELD:
340 : /* Inside object for one file; which key have we got? */
1101 rhaas 341 GIC 293096 : if (strcmp(fname, "Path") == 0)
342 58043 : parse->file_field = JMFF_PATH;
1101 rhaas 343 CBC 235053 : else if (strcmp(fname, "Encoded-Path") == 0)
1101 rhaas 344 GIC 968 : parse->file_field = JMFF_ENCODED_PATH;
1101 rhaas 345 GBC 234085 : else if (strcmp(fname, "Size") == 0)
1101 rhaas 346 GIC 59008 : parse->file_field = JMFF_SIZE;
1101 rhaas 347 CBC 175077 : else if (strcmp(fname, "Last-Modified") == 0)
1101 rhaas 348 GIC 59001 : parse->file_field = JMFF_LAST_MODIFIED;
1101 rhaas 349 CBC 116076 : else if (strcmp(fname, "Checksum-Algorithm") == 0)
350 58037 : parse->file_field = JMFF_CHECKSUM_ALGORITHM;
351 58039 : else if (strcmp(fname, "Checksum") == 0)
352 58038 : parse->file_field = JMFF_CHECKSUM;
1101 rhaas 353 ECB : else
1101 rhaas 354 CBC 1 : json_manifest_parse_failure(parse->context,
1101 rhaas 355 ECB : "unexpected file field");
1101 rhaas 356 CBC 293095 : parse->state = JM_EXPECT_THIS_FILE_VALUE;
357 293095 : break;
1101 rhaas 358 ECB :
1101 rhaas 359 CBC 199 : case JM_EXPECT_THIS_WAL_RANGE_FIELD:
1101 rhaas 360 ECB : /* Inside object for one file; which key have we got? */
1101 rhaas 361 GIC 199 : if (strcmp(fname, "Timeline") == 0)
1101 rhaas 362 CBC 67 : parse->wal_range_field = JMWRF_TIMELINE;
1101 rhaas 363 GIC 132 : else if (strcmp(fname, "Start-LSN") == 0)
1101 rhaas 364 CBC 66 : parse->wal_range_field = JMWRF_START_LSN;
365 66 : else if (strcmp(fname, "End-LSN") == 0)
1101 rhaas 366 GIC 65 : parse->wal_range_field = JMWRF_END_LSN;
1101 rhaas 367 ECB : else
1101 rhaas 368 GIC 1 : json_manifest_parse_failure(parse->context,
937 peter 369 ECB : "unexpected WAL range field");
1101 rhaas 370 CBC 198 : parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE;
371 198 : break;
1101 rhaas 372 ECB :
1101 rhaas 373 LBC 0 : default:
374 0 : json_manifest_parse_failure(parse->context,
375 : "unexpected object field");
376 0 : break;
377 : }
378 :
119 tgl 379 GNC 293588 : return JSON_SUCCESS;
1101 rhaas 380 ECB : }
381 :
382 : /*
1101 rhaas 383 EUB : * Invoked at the start of each scalar in the JSON document.
384 : *
385 : * Object field names don't reach this code; those are handled by
386 : * json_manifest_object_field_start. When we're inside of the object for
387 : * a particular file or WAL range, that function will have noticed the name
388 : * of the field, and we'll get the corresponding value here. When we're in
1101 rhaas 389 ECB : * the toplevel object, the parse state itself tells us which field this is.
390 : *
391 : * In all cases except for PostgreSQL-Backup-Manifest-Version, which we
392 : * can just check on the spot, the goal here is just to save the value in
393 : * the parse state for later use. We don't actually do anything until we
394 : * reach either the end of the object representing this file, or the end
395 : * of the manifest, as the case may be.
396 : */
397 : static JsonParseErrorType
1101 rhaas 398 GIC 293444 : json_manifest_scalar(void *state, char *token, JsonTokenType tokentype)
399 : {
400 293444 : JsonManifestParseState *parse = state;
401 :
402 293444 : switch (parse->state)
403 : {
404 86 : case JM_EXPECT_VERSION_VALUE:
405 86 : if (strcmp(token, "1") != 0)
406 1 : json_manifest_parse_failure(parse->context,
407 : "unexpected manifest version");
1101 rhaas 408 CBC 85 : parse->state = JM_EXPECT_TOPLEVEL_FIELD;
1101 rhaas 409 GIC 85 : break;
1101 rhaas 410 ECB :
1101 rhaas 411 GIC 293095 : case JM_EXPECT_THIS_FILE_VALUE:
1101 rhaas 412 CBC 293095 : switch (parse->file_field)
413 : {
414 58043 : case JMFF_PATH:
415 58043 : parse->pathname = token;
416 58043 : break;
1101 rhaas 417 GIC 968 : case JMFF_ENCODED_PATH:
1101 rhaas 418 CBC 968 : parse->encoded_pathname = token;
419 968 : break;
1101 rhaas 420 GIC 59008 : case JMFF_SIZE:
1101 rhaas 421 CBC 59008 : parse->size = token;
422 59008 : break;
1101 rhaas 423 GIC 59001 : case JMFF_LAST_MODIFIED:
1101 rhaas 424 CBC 59001 : pfree(token); /* unused */
425 59001 : break;
426 58037 : case JMFF_CHECKSUM_ALGORITHM:
427 58037 : parse->algorithm = token;
428 58037 : break;
429 58038 : case JMFF_CHECKSUM:
430 58038 : parse->checksum = token;
431 58038 : break;
1101 rhaas 432 ECB : }
1101 rhaas 433 CBC 293095 : parse->state = JM_EXPECT_THIS_FILE_FIELD;
434 293095 : break;
1101 rhaas 435 ECB :
1101 rhaas 436 CBC 198 : case JM_EXPECT_THIS_WAL_RANGE_VALUE:
437 198 : switch (parse->wal_range_field)
1101 rhaas 438 ECB : {
1101 rhaas 439 CBC 67 : case JMWRF_TIMELINE:
440 67 : parse->timeline = token;
441 67 : break;
1101 rhaas 442 GIC 66 : case JMWRF_START_LSN:
1101 rhaas 443 CBC 66 : parse->start_lsn = token;
444 66 : break;
1101 rhaas 445 GIC 65 : case JMWRF_END_LSN:
1101 rhaas 446 CBC 65 : parse->end_lsn = token;
447 65 : break;
448 : }
449 198 : parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD;
450 198 : break;
1101 rhaas 451 ECB :
1101 rhaas 452 CBC 64 : case JM_EXPECT_MANIFEST_CHECKSUM_VALUE:
453 64 : parse->state = JM_EXPECT_TOPLEVEL_END;
454 64 : parse->manifest_checksum = token;
455 64 : break;
1101 rhaas 456 ECB :
1101 rhaas 457 CBC 1 : default:
1101 rhaas 458 GIC 1 : json_manifest_parse_failure(parse->context, "unexpected scalar");
1101 rhaas 459 LBC 0 : break;
1101 rhaas 460 ECB : }
461 :
119 tgl 462 GNC 293442 : return JSON_SUCCESS;
463 : }
1101 rhaas 464 ECB :
465 : /*
466 : * Do additional parsing and sanity-checking of the details gathered for one
467 : * file, and invoke the per-file callback so that the caller gets those
468 : * details. This happens for each file when the corresponding JSON object is
469 : * completely parsed.
470 : */
1101 rhaas 471 EUB : static void
1101 rhaas 472 GIC 59011 : json_manifest_finalize_file(JsonManifestParseState *parse)
473 : {
1101 rhaas 474 CBC 59011 : JsonManifestParseContext *context = parse->context;
475 : size_t size;
476 : char *ep;
477 : int checksum_string_length;
478 : pg_checksum_type checksum_type;
479 : int checksum_length;
480 : uint8 *checksum_payload;
481 :
482 : /* Pathname and size are required. */
1101 rhaas 483 GIC 59011 : if (parse->pathname == NULL && parse->encoded_pathname == NULL)
937 peter 484 CBC 1 : json_manifest_parse_failure(parse->context, "missing path name");
1101 rhaas 485 GIC 59010 : if (parse->pathname != NULL && parse->encoded_pathname != NULL)
1101 rhaas 486 CBC 1 : json_manifest_parse_failure(parse->context,
487 : "both path name and encoded path name");
1101 rhaas 488 GIC 59009 : if (parse->size == NULL)
489 1 : json_manifest_parse_failure(parse->context, "missing size");
490 59008 : if (parse->algorithm == NULL && parse->checksum != NULL)
491 1 : json_manifest_parse_failure(parse->context,
492 : "checksum without algorithm");
493 :
494 : /* Decode encoded pathname, if that's what we have. */
1101 rhaas 495 CBC 59007 : if (parse->encoded_pathname != NULL)
1101 rhaas 496 ECB : {
1101 rhaas 497 CBC 967 : int encoded_length = strlen(parse->encoded_pathname);
498 967 : int raw_length = encoded_length / 2;
499 :
500 967 : parse->pathname = palloc(raw_length + 1);
501 967 : if (encoded_length % 2 != 0 ||
502 966 : !hexdecode_string((uint8 *) parse->pathname,
1101 rhaas 503 ECB : parse->encoded_pathname,
504 : raw_length))
1101 rhaas 505 GIC 1 : json_manifest_parse_failure(parse->context,
506 : "could not decode file name");
1101 rhaas 507 CBC 966 : parse->pathname[raw_length] = '\0';
1101 rhaas 508 GIC 966 : pfree(parse->encoded_pathname);
1101 rhaas 509 CBC 966 : parse->encoded_pathname = NULL;
1101 rhaas 510 ECB : }
511 :
512 : /* Parse size. */
1101 rhaas 513 CBC 59006 : size = strtoul(parse->size, &ep, 10);
514 59006 : if (*ep)
1101 rhaas 515 GIC 1 : json_manifest_parse_failure(parse->context,
516 : "file size is not an integer");
1101 rhaas 517 ECB :
518 : /* Parse the checksum algorithm, if it's present. */
1101 rhaas 519 CBC 59005 : if (parse->algorithm == NULL)
520 968 : checksum_type = CHECKSUM_TYPE_NONE;
521 58037 : else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type))
1101 rhaas 522 GIC 1 : context->error_cb(context, "unrecognized checksum algorithm: \"%s\"",
523 : parse->algorithm);
524 :
1101 rhaas 525 ECB : /* Parse the checksum payload, if it's present. */
1101 rhaas 526 CBC 59004 : checksum_string_length = parse->checksum == NULL ? 0
527 58036 : : strlen(parse->checksum);
1101 rhaas 528 GIC 59004 : if (checksum_string_length == 0)
529 : {
530 968 : checksum_length = 0;
1101 rhaas 531 CBC 968 : checksum_payload = NULL;
1101 rhaas 532 ECB : }
533 : else
534 : {
1101 rhaas 535 GIC 58036 : checksum_length = checksum_string_length / 2;
536 58036 : checksum_payload = palloc(checksum_length);
537 58036 : if (checksum_string_length % 2 != 0 ||
1101 rhaas 538 CBC 58035 : !hexdecode_string(checksum_payload, parse->checksum,
1101 rhaas 539 ECB : checksum_length))
1101 rhaas 540 CBC 1 : context->error_cb(context,
541 : "invalid checksum for file \"%s\": \"%s\"",
1101 rhaas 542 ECB : parse->pathname, parse->checksum);
543 : }
544 :
545 : /* Invoke the callback with the details we've gathered. */
1101 rhaas 546 GIC 59003 : context->perfile_cb(context, parse->pathname, size,
1101 rhaas 547 ECB : checksum_type, checksum_length, checksum_payload);
548 :
549 : /* Free memory we no longer need. */
1101 rhaas 550 CBC 59002 : if (parse->size != NULL)
551 : {
552 59002 : pfree(parse->size);
1101 rhaas 553 GIC 59002 : parse->size = NULL;
554 : }
555 59002 : if (parse->algorithm != NULL)
556 : {
557 58035 : pfree(parse->algorithm);
1101 rhaas 558 CBC 58035 : parse->algorithm = NULL;
559 : }
1101 rhaas 560 GIC 59002 : if (parse->checksum != NULL)
561 : {
1101 rhaas 562 CBC 58035 : pfree(parse->checksum);
1101 rhaas 563 GIC 58035 : parse->checksum = NULL;
1101 rhaas 564 ECB : }
1101 rhaas 565 CBC 59002 : }
566 :
1101 rhaas 567 ECB : /*
568 : * Do additional parsing and sanity-checking of the details gathered for one
569 : * WAL range, and invoke the per-WAL-range callback so that the caller gets
570 : * those details. This happens for each WAL range when the corresponding JSON
571 : * object is completely parsed.
572 : */
573 : static void
1101 rhaas 574 CBC 68 : json_manifest_finalize_wal_range(JsonManifestParseState *parse)
1101 rhaas 575 ECB : {
1101 rhaas 576 GIC 68 : JsonManifestParseContext *context = parse->context;
1101 rhaas 577 ECB : TimeLineID tli;
578 : XLogRecPtr start_lsn,
579 : end_lsn;
580 : char *ep;
581 :
582 : /* Make sure all fields are present. */
1101 rhaas 583 GIC 68 : if (parse->timeline == NULL)
584 1 : json_manifest_parse_failure(parse->context, "missing timeline");
585 67 : if (parse->start_lsn == NULL)
1101 rhaas 586 CBC 1 : json_manifest_parse_failure(parse->context, "missing start LSN");
1101 rhaas 587 GIC 66 : if (parse->end_lsn == NULL)
1101 rhaas 588 CBC 1 : json_manifest_parse_failure(parse->context, "missing end LSN");
589 :
590 : /* Parse timeline. */
1101 rhaas 591 GIC 65 : tli = strtoul(parse->timeline, &ep, 10);
592 65 : if (*ep)
593 1 : json_manifest_parse_failure(parse->context,
594 : "timeline is not an integer");
1101 rhaas 595 CBC 64 : if (!parse_xlogrecptr(&start_lsn, parse->start_lsn))
596 1 : json_manifest_parse_failure(parse->context,
937 peter 597 ECB : "could not parse start LSN");
1101 rhaas 598 CBC 63 : if (!parse_xlogrecptr(&end_lsn, parse->end_lsn))
599 1 : json_manifest_parse_failure(parse->context,
937 peter 600 ECB : "could not parse end LSN");
601 :
602 : /* Invoke the callback with the details we've gathered. */
1101 rhaas 603 CBC 62 : context->perwalrange_cb(context, tli, start_lsn, end_lsn);
1101 rhaas 604 ECB :
605 : /* Free memory we no longer need. */
1101 rhaas 606 GIC 62 : if (parse->timeline != NULL)
1101 rhaas 607 ECB : {
1101 rhaas 608 CBC 62 : pfree(parse->timeline);
1101 rhaas 609 GIC 62 : parse->timeline = NULL;
1101 rhaas 610 ECB : }
1101 rhaas 611 CBC 62 : if (parse->start_lsn != NULL)
612 : {
1101 rhaas 613 GIC 62 : pfree(parse->start_lsn);
614 62 : parse->start_lsn = NULL;
1101 rhaas 615 ECB : }
1101 rhaas 616 GIC 62 : if (parse->end_lsn != NULL)
617 : {
1101 rhaas 618 CBC 62 : pfree(parse->end_lsn);
1101 rhaas 619 GIC 62 : parse->end_lsn = NULL;
1101 rhaas 620 ECB : }
1101 rhaas 621 CBC 62 : }
622 :
1101 rhaas 623 ECB : /*
624 : * Verify that the manifest checksum is correct.
625 : *
626 : * The last line of the manifest file is excluded from the manifest checksum,
627 : * because the last line is expected to contain the checksum that covers
628 : * the rest of the file.
629 : */
630 : static void
1101 rhaas 631 CBC 64 : verify_manifest_checksum(JsonManifestParseState *parse, char *buffer,
632 : size_t size)
1101 rhaas 633 ECB : {
1101 rhaas 634 GIC 64 : JsonManifestParseContext *context = parse->context;
635 : size_t i;
636 64 : size_t number_of_newlines = 0;
637 64 : size_t ultimate_newline = 0;
638 64 : size_t penultimate_newline = 0;
639 : pg_cryptohash_ctx *manifest_ctx;
640 : uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH];
641 : uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH];
642 :
1101 rhaas 643 ECB : /* Find the last two newlines in the file. */
1101 rhaas 644 GIC 8655763 : for (i = 0; i < size; ++i)
645 : {
1101 rhaas 646 CBC 8655699 : if (buffer[i] == '\n')
647 : {
648 59434 : ++number_of_newlines;
649 59434 : penultimate_newline = ultimate_newline;
650 59434 : ultimate_newline = i;
651 : }
652 : }
653 :
654 : /*
655 : * Make sure that the last newline is right at the end, and that there are
1101 rhaas 656 ECB : * at least two lines total. We need this to be true in order for the
657 : * following code, which computes the manifest checksum, to work properly.
658 : */
1101 rhaas 659 GIC 64 : if (number_of_newlines < 2)
1101 rhaas 660 CBC 1 : json_manifest_parse_failure(parse->context,
1101 rhaas 661 ECB : "expected at least 2 lines");
1101 rhaas 662 CBC 63 : if (ultimate_newline != size - 1)
1101 rhaas 663 GIC 1 : json_manifest_parse_failure(parse->context,
664 : "last line not newline-terminated");
665 :
666 : /* Checksum the rest. */
858 michael 667 62 : manifest_ctx = pg_cryptohash_create(PG_SHA256);
668 62 : if (manifest_ctx == NULL)
858 michael 669 UIC 0 : context->error_cb(context, "out of memory");
858 michael 670 GIC 62 : if (pg_cryptohash_init(manifest_ctx) < 0)
858 michael 671 LBC 0 : context->error_cb(context, "could not initialize checksum of manifest");
858 michael 672 CBC 62 : if (pg_cryptohash_update(manifest_ctx, (uint8 *) buffer, penultimate_newline + 1) < 0)
858 michael 673 UIC 0 : context->error_cb(context, "could not update checksum of manifest");
783 michael 674 CBC 62 : if (pg_cryptohash_final(manifest_ctx, manifest_checksum_actual,
783 michael 675 ECB : sizeof(manifest_checksum_actual)) < 0)
858 michael 676 UIC 0 : context->error_cb(context, "could not finalize checksum of manifest");
677 :
678 : /* Now verify it. */
1101 rhaas 679 CBC 62 : if (parse->manifest_checksum == NULL)
1101 rhaas 680 LBC 0 : context->error_cb(parse->context, "manifest has no checksum");
1101 rhaas 681 GBC 62 : if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 ||
1101 rhaas 682 CBC 62 : !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum,
1101 rhaas 683 EUB : PG_SHA256_DIGEST_LENGTH))
1101 rhaas 684 CBC 1 : context->error_cb(context, "invalid manifest checksum: \"%s\"",
1101 rhaas 685 EUB : parse->manifest_checksum);
1101 rhaas 686 CBC 61 : if (memcmp(manifest_checksum_actual, manifest_checksum_expected,
687 : PG_SHA256_DIGEST_LENGTH) != 0)
1101 rhaas 688 GBC 1 : context->error_cb(context, "manifest checksum mismatch");
858 michael 689 GIC 60 : pg_cryptohash_free(manifest_ctx);
1101 rhaas 690 60 : }
1101 rhaas 691 ECB :
1101 rhaas 692 EUB : /*
1101 rhaas 693 ECB : * Report a parse error.
694 : *
695 : * This is intended to be used for fairly low-level failures that probably
696 : * shouldn't occur unless somebody has deliberately constructed a bad manifest,
697 : * or unless the server is generating bad manifests due to some bug. msg should
698 : * be a short string giving some hint as to what the problem is.
699 : */
700 : static void
1101 rhaas 701 CBC 25 : json_manifest_parse_failure(JsonManifestParseContext *context, char *msg)
1101 rhaas 702 ECB : {
1101 rhaas 703 GIC 25 : context->error_cb(context, "could not parse backup manifest: %s", msg);
704 : }
705 :
706 : /*
707 : * Convert a character which represents a hexadecimal digit to an integer.
708 : *
709 : * Returns -1 if the character is not a hexadecimal digit.
710 : */
711 : static int
712 792792 : hexdecode_char(char c)
1101 rhaas 713 ECB : {
1101 rhaas 714 GIC 792792 : if (c >= '0' && c <= '9')
1101 rhaas 715 CBC 528300 : return c - '0';
1101 rhaas 716 GIC 264492 : if (c >= 'a' && c <= 'f')
717 264484 : return c - 'a' + 10;
718 8 : if (c >= 'A' && c <= 'F')
719 6 : return c - 'A' + 10;
720 :
721 2 : return -1;
722 : }
723 :
1101 rhaas 724 ECB : /*
725 : * Decode a hex string into a byte string, 2 hex chars per byte.
726 : *
727 : * Returns false if invalid characters are encountered; otherwise true.
728 : */
729 : static bool
1101 rhaas 730 CBC 59063 : hexdecode_string(uint8 *result, char *input, int nbytes)
1101 rhaas 731 ECB : {
732 : int i;
733 :
1101 rhaas 734 GIC 455458 : for (i = 0; i < nbytes; ++i)
735 : {
736 396396 : int n1 = hexdecode_char(input[i * 2]);
737 396396 : int n2 = hexdecode_char(input[i * 2 + 1]);
738 :
739 396396 : if (n1 < 0 || n2 < 0)
740 1 : return false;
741 396395 : result[i] = n1 * 16 + n2;
1101 rhaas 742 ECB : }
743 :
1101 rhaas 744 GIC 59062 : return true;
745 : }
1101 rhaas 746 ECB :
747 : /*
748 : * Parse an XLogRecPtr expressed using the usual string format.
749 : */
750 : static bool
1101 rhaas 751 CBC 127 : parse_xlogrecptr(XLogRecPtr *result, char *input)
1101 rhaas 752 ECB : {
753 : uint32 hi;
754 : uint32 lo;
755 :
1101 rhaas 756 CBC 127 : if (sscanf(input, "%X/%X", &hi, &lo) != 2)
1101 rhaas 757 GIC 2 : return false;
758 125 : *result = ((uint64) hi) << 32 | lo;
759 125 : return true;
760 : }
|