Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * backup_manifest.c
4 : * code for generating and sending a backup manifest
5 : *
6 : * Portions Copyright (c) 2010-2023, PostgreSQL Global Development Group
7 : *
8 : * IDENTIFICATION
9 : * src/backend/backup/backup_manifest.c
10 : *
11 : *-------------------------------------------------------------------------
12 : */
13 : #include "postgres.h"
14 :
15 : #include "access/timeline.h"
16 : #include "backup/backup_manifest.h"
17 : #include "backup/basebackup_sink.h"
18 : #include "libpq/libpq.h"
19 : #include "libpq/pqformat.h"
20 : #include "mb/pg_wchar.h"
21 : #include "utils/builtins.h"
22 : #include "utils/json.h"
23 :
24 : static void AppendStringToManifest(backup_manifest_info *manifest, const char *s);
25 :
26 : /*
27 : * Does the user want a backup manifest?
28 : *
29 : * It's simplest to always have a manifest_info object, so that we don't need
30 : * checks for NULL pointers in too many places. However, if the user doesn't
31 : * want a manifest, we set manifest->buffile to NULL.
32 : */
33 : static inline bool
1081 rhaas 34 CBC 119774 : IsManifestEnabled(backup_manifest_info *manifest)
35 : {
1084 36 119774 : return (manifest->buffile != NULL);
37 : }
38 :
39 : /*
40 : * Convenience macro for appending data to the backup manifest.
41 : */
42 : #define AppendToManifest(manifest, ...) \
43 : { \
44 : char *_manifest_s = psprintf(__VA_ARGS__); \
45 : AppendStringToManifest(manifest, _manifest_s); \
46 : pfree(_manifest_s); \
47 : }
48 :
49 : /*
50 : * Initialize state so that we can construct a backup manifest.
51 : *
52 : * NB: Although the checksum type for the data files is configurable, the
53 : * checksum for the manifest itself always uses SHA-256. See comments in
54 : * SendBackupManifest.
55 : */
56 : void
1081 57 126 : InitializeBackupManifest(backup_manifest_info *manifest,
58 : backup_manifest_option want_manifest,
59 : pg_checksum_type manifest_checksum_type)
60 : {
878 michael 61 126 : memset(manifest, 0, sizeof(backup_manifest_info));
62 126 : manifest->checksum_type = manifest_checksum_type;
63 :
1084 rhaas 64 126 : if (want_manifest == MANIFEST_OPTION_NO)
65 1 : manifest->buffile = NULL;
66 : else
67 : {
68 125 : manifest->buffile = BufFileCreateTemp(false);
858 michael 69 125 : manifest->manifest_ctx = pg_cryptohash_create(PG_SHA256);
70 125 : if (pg_cryptohash_init(manifest->manifest_ctx) < 0)
453 michael 71 UBC 0 : elog(ERROR, "failed to initialize checksum of backup manifest: %s",
72 : pg_cryptohash_error(manifest->manifest_ctx));
73 : }
74 :
1084 rhaas 75 CBC 126 : manifest->manifest_size = UINT64CONST(0);
76 126 : manifest->force_encode = (want_manifest == MANIFEST_OPTION_FORCE_ENCODE);
77 126 : manifest->first_file = true;
78 126 : manifest->still_checksumming = true;
79 :
80 126 : if (want_manifest != MANIFEST_OPTION_NO)
81 125 : AppendToManifest(manifest,
82 : "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n"
83 : "\"Files\": [");
84 126 : }
85 :
86 : /*
87 : * Free resources assigned to a backup manifest constructed.
88 : */
89 : void
858 michael 90 117 : FreeBackupManifest(backup_manifest_info *manifest)
91 : {
92 117 : pg_cryptohash_free(manifest->manifest_ctx);
93 117 : manifest->manifest_ctx = NULL;
94 117 : }
95 :
96 : /*
97 : * Add an entry to the backup manifest for a file.
98 : */
99 : void
1081 rhaas 100 119534 : AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
101 : const char *pathname, size_t size, pg_time_t mtime,
102 : pg_checksum_context *checksum_ctx)
103 : {
104 : char pathbuf[MAXPGPATH];
105 : int pathlen;
106 : StringInfoData buf;
107 :
1084 108 119534 : if (!IsManifestEnabled(manifest))
109 967 : return;
110 :
111 : /*
112 : * If this file is part of a tablespace, the pathname passed to this
113 : * function will be relative to the tar file that contains it. We want the
114 : * pathname relative to the data directory (ignoring the intermediate
115 : * symlink traversal).
116 : */
117 118567 : if (spcoid != NULL)
118 : {
119 16 : snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
120 : pathname);
121 16 : pathname = pathbuf;
122 : }
123 :
124 : /*
125 : * Each file's entry needs to be separated from any entry that follows by
126 : * a comma, but there's no comma before the first one or after the last
127 : * one. To make that work, adding a file to the manifest starts by
128 : * terminating the most recently added line, with a comma if appropriate,
129 : * but does not terminate the line inserted for this file.
130 : */
131 118567 : initStringInfo(&buf);
132 118567 : if (manifest->first_file)
133 : {
906 drowley 134 125 : appendStringInfoChar(&buf, '\n');
1084 rhaas 135 125 : manifest->first_file = false;
136 : }
137 : else
138 118442 : appendStringInfoString(&buf, ",\n");
139 :
140 : /*
141 : * Write the relative pathname to this file out to the manifest. The
142 : * manifest is always stored in UTF-8, so we have to encode paths that are
143 : * not valid in that encoding.
144 : */
145 118567 : pathlen = strlen(pathname);
146 236168 : if (!manifest->force_encode &&
147 117601 : pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
148 : {
149 117601 : appendStringInfoString(&buf, "{ \"Path\": ");
150 117601 : escape_json(&buf, pathname);
151 117601 : appendStringInfoString(&buf, ", ");
152 : }
153 : else
154 : {
155 966 : appendStringInfoString(&buf, "{ \"Encoded-Path\": \"");
598 michael 156 966 : enlargeStringInfo(&buf, 2 * pathlen);
157 1932 : buf.len += hex_encode(pathname, pathlen,
158 966 : &buf.data[buf.len]);
1084 rhaas 159 966 : appendStringInfoString(&buf, "\", ");
160 : }
161 :
162 118567 : appendStringInfo(&buf, "\"Size\": %zu, ", size);
163 :
164 : /*
165 : * Convert last modification time to a string and append it to the
166 : * manifest. Since it's not clear what time zone to use and since time
167 : * zone definitions can change, possibly causing confusion, use GMT
168 : * always.
169 : */
170 118567 : appendStringInfoString(&buf, "\"Last-Modified\": \"");
171 118567 : enlargeStringInfo(&buf, 128);
172 118567 : buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z",
173 118567 : pg_gmtime(&mtime));
906 drowley 174 118567 : appendStringInfoChar(&buf, '"');
175 :
176 : /* Add checksum information. */
1084 rhaas 177 118567 : if (checksum_ctx->type != CHECKSUM_TYPE_NONE)
178 : {
179 : uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH];
180 : int checksumlen;
181 :
182 117601 : checksumlen = pg_checksum_final(checksum_ctx, checksumbuf);
858 michael 183 117601 : if (checksumlen < 0)
858 michael 184 UBC 0 : elog(ERROR, "could not finalize checksum of file \"%s\"",
185 : pathname);
186 :
1084 rhaas 187 CBC 117601 : appendStringInfo(&buf,
188 : ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"",
189 : pg_checksum_type_name(checksum_ctx->type));
598 michael 190 117601 : enlargeStringInfo(&buf, 2 * checksumlen);
191 235202 : buf.len += hex_encode((char *) checksumbuf, checksumlen,
192 117601 : &buf.data[buf.len]);
906 drowley 193 117601 : appendStringInfoChar(&buf, '"');
194 : }
195 :
196 : /* Close out the object. */
1084 rhaas 197 118567 : appendStringInfoString(&buf, " }");
198 :
199 : /* OK, add it to the manifest. */
200 118567 : AppendStringToManifest(manifest, buf.data);
201 :
202 : /* Avoid leaking memory. */
203 118567 : pfree(buf.data);
204 : }
205 :
206 : /*
207 : * Add information about the WAL that will need to be replayed when restoring
208 : * this backup to the manifest.
209 : */
210 : void
1081 211 120 : AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
212 : TimeLineID starttli, XLogRecPtr endptr,
213 : TimeLineID endtli)
214 : {
215 : List *timelines;
216 : ListCell *lc;
1084 217 120 : bool first_wal_range = true;
218 120 : bool found_start_timeline = false;
219 :
220 120 : if (!IsManifestEnabled(manifest))
221 1 : return;
222 :
223 : /* Terminate the list of files. */
224 119 : AppendStringToManifest(manifest, "\n],\n");
225 :
226 : /* Read the timeline history for the ending timeline. */
227 119 : timelines = readTimeLineHistory(endtli);
228 :
229 : /* Start a list of LSN ranges. */
230 119 : AppendStringToManifest(manifest, "\"WAL-Ranges\": [\n");
231 :
232 119 : foreach(lc, timelines)
233 : {
234 119 : TimeLineHistoryEntry *entry = lfirst(lc);
235 : XLogRecPtr tl_beginptr;
236 :
237 : /*
238 : * We only care about timelines that were active during the backup.
239 : * Skip any that ended before the backup started. (Note that if
240 : * entry->end is InvalidXLogRecPtr, it means that the timeline has not
241 : * yet ended.)
242 : */
243 119 : if (!XLogRecPtrIsInvalid(entry->end) && entry->end < startptr)
1084 rhaas 244 UBC 0 : continue;
245 :
246 : /*
247 : * Because the timeline history file lists newer timelines before
248 : * older ones, the first timeline we encounter that is new enough to
249 : * matter ought to match the ending timeline of the backup.
250 : */
1084 rhaas 251 CBC 119 : if (first_wal_range && endtli != entry->tli)
1084 rhaas 252 UBC 0 : ereport(ERROR,
253 : errmsg("expected end timeline %u but found timeline %u",
254 : starttli, entry->tli));
255 :
256 : /*
257 : * If this timeline entry matches with the timeline on which the
258 : * backup started, WAL needs to be checked from the start LSN of the
259 : * backup. If this entry refers to a newer timeline, WAL needs to be
260 : * checked since the beginning of this timeline, so use the LSN where
261 : * the timeline began.
262 : */
594 michael 263 CBC 119 : if (starttli == entry->tli)
264 119 : tl_beginptr = startptr;
265 : else
266 : {
594 michael 267 UBC 0 : tl_beginptr = entry->begin;
268 :
269 : /*
270 : * If we reach a TLI that has no valid beginning LSN, there can't
271 : * be any more timelines in the history after this point, so we'd
272 : * better have arrived at the expected starting TLI. If not,
273 : * something's gone horribly wrong.
274 : */
275 0 : if (XLogRecPtrIsInvalid(entry->begin))
1084 rhaas 276 0 : ereport(ERROR,
277 : errmsg("expected start timeline %u but found timeline %u",
278 : starttli, entry->tli));
279 : }
280 :
1084 rhaas 281 CBC 119 : AppendToManifest(manifest,
282 : "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
283 : first_wal_range ? "" : ",\n",
284 : entry->tli,
285 : LSN_FORMAT_ARGS(tl_beginptr),
286 : LSN_FORMAT_ARGS(endptr));
287 :
288 119 : if (starttli == entry->tli)
289 : {
290 119 : found_start_timeline = true;
291 119 : break;
292 : }
293 :
1084 rhaas 294 UBC 0 : endptr = entry->begin;
295 0 : first_wal_range = false;
296 : }
297 :
298 : /*
299 : * The last entry in the timeline history for the ending timeline should
300 : * be the ending timeline itself. Verify that this is what we observed.
301 : */
1084 rhaas 302 CBC 119 : if (!found_start_timeline)
1084 rhaas 303 UBC 0 : ereport(ERROR,
304 : errmsg("start timeline %u not found in history of timeline %u",
305 : starttli, endtli));
306 :
307 : /* Terminate the list of WAL ranges. */
1084 rhaas 308 CBC 119 : AppendStringToManifest(manifest, "\n],\n");
309 : }
310 :
311 : /*
312 : * Finalize the backup manifest, and send it to the client.
313 : */
314 : void
332 tgl 315 120 : SendBackupManifest(backup_manifest_info *manifest, bbsink *sink)
316 : {
317 : uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH];
318 : char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH];
1084 rhaas 319 120 : size_t manifest_bytes_done = 0;
320 :
321 120 : if (!IsManifestEnabled(manifest))
322 1 : return;
323 :
324 : /*
325 : * Append manifest checksum, so that the problems with the manifest itself
326 : * can be detected.
327 : *
328 : * We always use SHA-256 for this, regardless of what algorithm is chosen
329 : * for checksumming the files. If we ever want to make the checksum
330 : * algorithm used for the manifest file variable, the client will need a
331 : * way to figure out which algorithm to use as close to the beginning of
332 : * the manifest file as possible, to avoid having to read the whole thing
333 : * twice.
334 : */
335 119 : manifest->still_checksumming = false;
783 michael 336 119 : if (pg_cryptohash_final(manifest->manifest_ctx, checksumbuf,
337 : sizeof(checksumbuf)) < 0)
453 michael 338 UBC 0 : elog(ERROR, "failed to finalize checksum of backup manifest: %s",
339 : pg_cryptohash_error(manifest->manifest_ctx));
1084 rhaas 340 CBC 119 : AppendStringToManifest(manifest, "\"Manifest-Checksum\": \"");
341 :
598 michael 342 119 : hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf);
343 119 : checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0';
344 :
1084 rhaas 345 119 : AppendStringToManifest(manifest, checksumstringbuf);
346 119 : AppendStringToManifest(manifest, "\"}\n");
347 :
348 : /*
349 : * We've written all the data to the manifest file. Rewind the file so
350 : * that we can read it all back.
351 : */
11 peter 352 GNC 119 : if (BufFileSeek(manifest->buffile, 0, 0, SEEK_SET))
1084 rhaas 353 UBC 0 : ereport(ERROR,
354 : (errcode_for_file_access(),
355 : errmsg("could not rewind temporary file")));
356 :
357 :
358 : /*
359 : * Send the backup manifest.
360 : */
520 rhaas 361 CBC 119 : bbsink_begin_manifest(sink);
1084 362 726 : while (manifest_bytes_done < manifest->manifest_size)
363 : {
364 : size_t bytes_to_read;
1084 rhaas 365 ECB :
520 rhaas 366 GIC 607 : bytes_to_read = Min(sink->bbs_buffer_length,
1084 rhaas 367 ECB : manifest->manifest_size - manifest_bytes_done);
83 peter 368 GNC 607 : BufFileReadExact(manifest->buffile, sink->bbs_buffer, bytes_to_read);
520 rhaas 369 GIC 607 : bbsink_manifest_contents(sink, bytes_to_read);
1084 370 607 : manifest_bytes_done += bytes_to_read;
371 : }
520 372 119 : bbsink_end_manifest(sink);
373 :
374 : /* Release resources */
1084 rhaas 375 CBC 119 : BufFileClose(manifest->buffile);
376 : }
1081 rhaas 377 ECB :
378 : /*
379 : * Append a cstring to the manifest.
380 : */
381 : static void
100 peter 382 GNC 119525 : AppendStringToManifest(backup_manifest_info *manifest, const char *s)
1081 rhaas 383 EUB : {
1081 rhaas 384 GIC 119525 : int len = strlen(s);
385 :
1081 rhaas 386 CBC 119525 : Assert(manifest != NULL);
387 119525 : if (manifest->still_checksumming)
858 michael 388 ECB : {
858 michael 389 GIC 119168 : if (pg_cryptohash_update(manifest->manifest_ctx, (uint8 *) s, len) < 0)
453 michael 390 UIC 0 : elog(ERROR, "failed to update checksum of backup manifest: %s",
391 : pg_cryptohash_error(manifest->manifest_ctx));
392 : }
1027 tmunro 393 GIC 119525 : BufFileWrite(manifest->buffile, s, len);
1081 rhaas 394 119525 : manifest->manifest_size += len;
395 119525 : }
|