Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * bbstreamer_tar.c
4 : : *
5 : : * This module implements three types of tar processing. A tar parser
6 : : * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
7 : : * it into labelled chunks (any other value of bbstreamer_archive_context).
8 : : * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 : : * and produces a tarfile, optionally replacing member headers and trailers
10 : : * so that upstream bbstreamer objects can perform surgery on the tarfile
11 : : * contents without knowing the details of the tar format. A tar terminator
12 : : * just adds two blocks of NUL bytes to the end of the file, since older
13 : : * server versions produce files with this terminator omitted.
14 : : *
15 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
16 : : *
17 : : * IDENTIFICATION
18 : : * src/bin/pg_basebackup/bbstreamer_tar.c
19 : : *-------------------------------------------------------------------------
20 : : */
21 : :
22 : : #include "postgres_fe.h"
23 : :
24 : : #include <time.h>
25 : :
26 : : #include "bbstreamer.h"
27 : : #include "common/logging.h"
28 : : #include "pgtar.h"
29 : :
30 : : typedef struct bbstreamer_tar_parser
31 : : {
32 : : bbstreamer base;
33 : : bbstreamer_archive_context next_context;
34 : : bbstreamer_member member;
35 : : size_t file_bytes_sent;
36 : : size_t pad_bytes_expected;
37 : : } bbstreamer_tar_parser;
38 : :
39 : : typedef struct bbstreamer_tar_archiver
40 : : {
41 : : bbstreamer base;
42 : : bool rearchive_member;
43 : : } bbstreamer_tar_archiver;
44 : :
45 : : static void bbstreamer_tar_parser_content(bbstreamer *streamer,
46 : : bbstreamer_member *member,
47 : : const char *data, int len,
48 : : bbstreamer_archive_context context);
49 : : static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
50 : : static void bbstreamer_tar_parser_free(bbstreamer *streamer);
51 : : static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
52 : :
53 : : const bbstreamer_ops bbstreamer_tar_parser_ops = {
54 : : .content = bbstreamer_tar_parser_content,
55 : : .finalize = bbstreamer_tar_parser_finalize,
56 : : .free = bbstreamer_tar_parser_free
57 : : };
58 : :
59 : : static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
60 : : bbstreamer_member *member,
61 : : const char *data, int len,
62 : : bbstreamer_archive_context context);
63 : : static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
64 : : static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
65 : :
66 : : const bbstreamer_ops bbstreamer_tar_archiver_ops = {
67 : : .content = bbstreamer_tar_archiver_content,
68 : : .finalize = bbstreamer_tar_archiver_finalize,
69 : : .free = bbstreamer_tar_archiver_free
70 : : };
71 : :
72 : : static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
73 : : bbstreamer_member *member,
74 : : const char *data, int len,
75 : : bbstreamer_archive_context context);
76 : : static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
77 : : static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
78 : :
79 : : const bbstreamer_ops bbstreamer_tar_terminator_ops = {
80 : : .content = bbstreamer_tar_terminator_content,
81 : : .finalize = bbstreamer_tar_terminator_finalize,
82 : : .free = bbstreamer_tar_terminator_free
83 : : };
84 : :
85 : : /*
86 : : * Create a bbstreamer that can parse a stream of content as tar data.
87 : : *
88 : : * The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
89 : : * specified by 'next' will receive a series of typed chunks, as per the
90 : : * conventions described in bbstreamer.h.
91 : : */
92 : : extern bbstreamer *
891 rhaas@postgresql.org 93 :CBC 152 : bbstreamer_tar_parser_new(bbstreamer *next)
94 : : {
95 : : bbstreamer_tar_parser *streamer;
96 : :
97 : 152 : streamer = palloc0(sizeof(bbstreamer_tar_parser));
98 : 152 : *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
99 : : &bbstreamer_tar_parser_ops;
100 : 152 : streamer->base.bbs_next = next;
101 : 152 : initStringInfo(&streamer->base.bbs_buffer);
102 : 152 : streamer->next_context = BBSTREAMER_MEMBER_HEADER;
103 : :
104 : 152 : return &streamer->base;
105 : : }
106 : :
107 : : /*
108 : : * Parse unknown content as tar data.
109 : : */
110 : : static void
111 : 350686 : bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member,
112 : : const char *data, int len,
113 : : bbstreamer_archive_context context)
114 : : {
115 : 350686 : bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
116 : : size_t nbytes;
117 : :
118 : : /* Expect unparsed input. */
119 [ - + ]: 350686 : Assert(member == NULL);
120 [ - + ]: 350686 : Assert(context == BBSTREAMER_UNKNOWN);
121 : :
122 [ + + ]: 706886 : while (len > 0)
123 : : {
124 [ + + + + : 356351 : switch (mystreamer->next_context)
- ]
125 : : {
126 : 125370 : case BBSTREAMER_MEMBER_HEADER:
127 : :
128 : : /*
129 : : * If we're expecting an archive member header, accumulate a
130 : : * full block of data before doing anything further.
131 : : */
132 [ - + ]: 125370 : if (!bbstreamer_buffer_until(streamer, &data, &len,
133 : : TAR_BLOCK_SIZE))
891 rhaas@postgresql.org 134 :UBC 0 : return;
135 : :
136 : : /*
137 : : * Now we can process the header and get ready to process the
138 : : * file contents; however, we might find out that what we
139 : : * thought was the next file header is actually the start of
140 : : * the archive trailer. Switch modes accordingly.
141 : : */
891 rhaas@postgresql.org 142 [ + + ]:CBC 125370 : if (bbstreamer_tar_header(mystreamer))
143 : : {
144 [ + + ]: 125221 : if (mystreamer->member.size == 0)
145 : : {
146 : : /* No content; trailer is zero-length. */
147 : 24483 : bbstreamer_content(mystreamer->base.bbs_next,
148 : : &mystreamer->member,
149 : : NULL, 0,
150 : : BBSTREAMER_MEMBER_TRAILER);
151 : :
152 : : /* Expect next header. */
153 : 24483 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
154 : : }
155 : : else
156 : : {
157 : : /* Expect contents. */
158 : 100738 : mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS;
159 : : }
160 : 125221 : mystreamer->base.bbs_buffer.len = 0;
161 : 125221 : mystreamer->file_bytes_sent = 0;
162 : : }
163 : : else
164 : 149 : mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER;
165 : 125370 : break;
166 : :
167 : 224438 : case BBSTREAMER_MEMBER_CONTENTS:
168 : :
169 : : /*
170 : : * Send as much content as we have, but not more than the
171 : : * remaining file length.
172 : : */
173 [ - + ]: 224438 : Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 : 224438 : nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 : 224438 : nbytes = Min(nbytes, len);
176 [ - + ]: 224438 : Assert(nbytes > 0);
177 : 224438 : bbstreamer_content(mystreamer->base.bbs_next,
178 : : &mystreamer->member,
179 : : data, nbytes,
180 : : BBSTREAMER_MEMBER_CONTENTS);
181 : 224438 : mystreamer->file_bytes_sent += nbytes;
182 : 224438 : data += nbytes;
183 : 224438 : len -= nbytes;
184 : :
185 : : /*
186 : : * If we've not yet sent the whole file, then there's more
187 : : * content to come; otherwise, it's time to expect the file
188 : : * trailer.
189 : : */
190 [ - + ]: 224438 : Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 [ + + ]: 224438 : if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 : : {
193 [ + + ]: 100735 : if (mystreamer->pad_bytes_expected == 0)
194 : : {
195 : : /* Trailer is zero-length. */
196 : 94343 : bbstreamer_content(mystreamer->base.bbs_next,
197 : : &mystreamer->member,
198 : : NULL, 0,
199 : : BBSTREAMER_MEMBER_TRAILER);
200 : :
201 : : /* Expect next header. */
202 : 94343 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
203 : : }
204 : : else
205 : : {
206 : : /* Trailer is not zero-length. */
207 : 6392 : mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER;
208 : : }
209 : 100735 : mystreamer->base.bbs_buffer.len = 0;
210 : : }
211 : 224438 : break;
212 : :
213 : 6392 : case BBSTREAMER_MEMBER_TRAILER:
214 : :
215 : : /*
216 : : * If we're expecting an archive member trailer, accumulate
217 : : * the expected number of padding bytes before sending
218 : : * anything onward.
219 : : */
220 [ - + ]: 6392 : if (!bbstreamer_buffer_until(streamer, &data, &len,
221 : 6392 : mystreamer->pad_bytes_expected))
891 rhaas@postgresql.org 222 :UBC 0 : return;
223 : :
224 : : /* OK, now we can send it. */
891 rhaas@postgresql.org 225 :CBC 6392 : bbstreamer_content(mystreamer->base.bbs_next,
226 : : &mystreamer->member,
227 : 6392 : data, mystreamer->pad_bytes_expected,
228 : : BBSTREAMER_MEMBER_TRAILER);
229 : :
230 : : /* Expect next file header. */
231 : 6392 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
232 : 6392 : mystreamer->base.bbs_buffer.len = 0;
233 : 6392 : break;
234 : :
235 : 151 : case BBSTREAMER_ARCHIVE_TRAILER:
236 : :
237 : : /*
238 : : * We've seen an end-of-archive indicator, so anything more is
239 : : * buffered and sent as part of the archive trailer. But we
240 : : * don't expect more than 2 blocks.
241 : : */
242 : 151 : bbstreamer_buffer_bytes(streamer, &data, &len, len);
243 [ - + ]: 151 : if (len > 2 * TAR_BLOCK_SIZE)
737 tgl@sss.pgh.pa.us 244 :UBC 0 : pg_fatal("tar file trailer exceeds 2 blocks");
891 rhaas@postgresql.org 245 :CBC 151 : return;
246 : :
891 rhaas@postgresql.org 247 :UBC 0 : default:
248 : : /* Shouldn't happen. */
737 tgl@sss.pgh.pa.us 249 : 0 : pg_fatal("unexpected state while parsing tar archive");
250 : : }
251 : : }
252 : : }
253 : :
254 : : /*
255 : : * Parse a file header within a tar stream.
256 : : *
257 : : * The return value is true if we found a file header and passed it on to the
258 : : * next bbstreamer; it is false if we have reached the archive trailer.
259 : : */
260 : : static bool
891 rhaas@postgresql.org 261 :CBC 125370 : bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
262 : : {
263 : 125370 : bool has_nonzero_byte = false;
264 : : int i;
265 : 125370 : bbstreamer_member *member = &mystreamer->member;
266 : 125370 : char *buffer = mystreamer->base.bbs_buffer.data;
267 : :
268 [ - + ]: 125370 : Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269 : :
270 : : /* Check whether we've got a block of all zero bytes. */
271 [ + + ]: 201658 : for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272 : : {
273 [ + + ]: 201509 : if (buffer[i] != '\0')
274 : : {
275 : 125221 : has_nonzero_byte = true;
276 : 125221 : break;
277 : : }
278 : : }
279 : :
280 : : /*
281 : : * If the entire block was zeros, this is the end of the archive, not the
282 : : * start of the next file.
283 : : */
284 [ + + ]: 125370 : if (!has_nonzero_byte)
285 : 149 : return false;
286 : :
287 : : /*
288 : : * Parse key fields out of the header.
289 : : */
257 rhaas@postgresql.org 290 :GNC 125221 : strlcpy(member->pathname, &buffer[TAR_OFFSET_NAME], MAXPGPATH);
891 rhaas@postgresql.org 291 [ - + ]:CBC 125221 : if (member->pathname[0] == '\0')
737 tgl@sss.pgh.pa.us 292 :UBC 0 : pg_fatal("tar member has empty name");
257 rhaas@postgresql.org 293 :GNC 125221 : member->size = read_tar_number(&buffer[TAR_OFFSET_SIZE], 12);
294 : 125221 : member->mode = read_tar_number(&buffer[TAR_OFFSET_MODE], 8);
295 : 125221 : member->uid = read_tar_number(&buffer[TAR_OFFSET_UID], 8);
296 : 125221 : member->gid = read_tar_number(&buffer[TAR_OFFSET_GID], 8);
297 : 125221 : member->is_directory =
298 : 125221 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_DIRECTORY);
299 : 125221 : member->is_link =
300 : 125221 : (buffer[TAR_OFFSET_TYPEFLAG] == TAR_FILETYPE_SYMLINK);
891 rhaas@postgresql.org 301 [ + + ]:CBC 125221 : if (member->is_link)
257 rhaas@postgresql.org 302 :GNC 14 : strlcpy(member->linktarget, &buffer[TAR_OFFSET_LINKNAME], 100);
303 : :
304 : : /* Compute number of padding bytes. */
891 rhaas@postgresql.org 305 :CBC 125221 : mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
306 : :
307 : : /* Forward the entire header to the next bbstreamer. */
308 : 125221 : bbstreamer_content(mystreamer->base.bbs_next, member,
309 : : buffer, TAR_BLOCK_SIZE,
310 : : BBSTREAMER_MEMBER_HEADER);
311 : :
312 : 125221 : return true;
313 : : }
314 : :
315 : : /*
316 : : * End-of-stream processing for a tar parser.
317 : : */
318 : : static void
319 : 149 : bbstreamer_tar_parser_finalize(bbstreamer *streamer)
320 : : {
321 : 149 : bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
322 : :
323 [ - + ]: 149 : if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
891 rhaas@postgresql.org 324 [ # # ]:UBC 0 : (mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
325 [ # # ]: 0 : mystreamer->base.bbs_buffer.len > 0))
737 tgl@sss.pgh.pa.us 326 : 0 : pg_fatal("COPY stream ended before last file was finished");
327 : :
328 : : /* Send the archive trailer, even if empty. */
891 rhaas@postgresql.org 329 :CBC 149 : bbstreamer_content(streamer->bbs_next, NULL,
330 : 149 : streamer->bbs_buffer.data, streamer->bbs_buffer.len,
331 : : BBSTREAMER_ARCHIVE_TRAILER);
332 : :
333 : : /* Now finalize successor. */
334 : 149 : bbstreamer_finalize(streamer->bbs_next);
335 : 149 : }
336 : :
337 : : /*
338 : : * Free memory associated with a tar parser.
339 : : */
340 : : static void
341 : 149 : bbstreamer_tar_parser_free(bbstreamer *streamer)
342 : : {
343 : 149 : pfree(streamer->bbs_buffer.data);
344 : 149 : bbstreamer_free(streamer->bbs_next);
345 : 149 : }
346 : :
347 : : /*
348 : : * Create an bbstreamer that can generate a tar archive.
349 : : *
350 : : * This is intended to be usable either for generating a brand-new tar archive
351 : : * or for modifying one on the fly. The input should be a series of typed
352 : : * chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
353 : : * bbstreamer_tar_parser_content.
354 : : */
355 : : extern bbstreamer *
891 rhaas@postgresql.org 356 :UBC 0 : bbstreamer_tar_archiver_new(bbstreamer *next)
357 : : {
358 : : bbstreamer_tar_archiver *streamer;
359 : :
360 : 0 : streamer = palloc0(sizeof(bbstreamer_tar_archiver));
361 : 0 : *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
362 : : &bbstreamer_tar_archiver_ops;
363 : 0 : streamer->base.bbs_next = next;
364 : :
365 : 0 : return &streamer->base;
366 : : }
367 : :
368 : : /*
369 : : * Fix up the stream of input chunks to create a valid tar file.
370 : : *
371 : : * If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
372 : : * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
373 : : * passed through without change. Any other size is a fatal error (and
374 : : * indicates a bug).
375 : : *
376 : : * Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
377 : : * corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
378 : : * scratch. Specifically, we construct a block of zero bytes sufficient to
379 : : * pad out to a block boundary, as required by the tar format. Other
380 : : * BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
381 : : *
382 : : * Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
383 : : *
384 : : * The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
385 : : * blocks of zero bytes. Not all tar programs require this, but apparently
386 : : * some do. The server does not supply this trailer. If no archive trailer is
387 : : * present, one will be added by bbstreamer_tar_parser_finalize.
388 : : */
389 : : static void
390 : 0 : bbstreamer_tar_archiver_content(bbstreamer *streamer,
391 : : bbstreamer_member *member,
392 : : const char *data, int len,
393 : : bbstreamer_archive_context context)
394 : : {
395 : 0 : bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
396 : : char buffer[2 * TAR_BLOCK_SIZE];
397 : :
398 [ # # ]: 0 : Assert(context != BBSTREAMER_UNKNOWN);
399 : :
400 [ # # # # ]: 0 : if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
401 : : {
402 [ # # ]: 0 : Assert(len == 0);
403 : :
404 : : /* Replace zero-length tar header with a newly constructed one. */
405 : 0 : tarCreateHeader(buffer, member->pathname, NULL,
406 : : member->size, member->mode, member->uid, member->gid,
407 : : time(NULL));
408 : 0 : data = buffer;
409 : 0 : len = TAR_BLOCK_SIZE;
410 : :
411 : : /* Also make a note to replace padding, in case size changed. */
412 : 0 : mystreamer->rearchive_member = true;
413 : : }
414 [ # # ]: 0 : else if (context == BBSTREAMER_MEMBER_TRAILER &&
415 [ # # ]: 0 : mystreamer->rearchive_member)
416 : 0 : {
417 : 0 : int pad_bytes = tarPaddingBytesRequired(member->size);
418 : :
419 : : /* Also replace padding, if we regenerated the header. */
420 : 0 : memset(buffer, 0, pad_bytes);
421 : 0 : data = buffer;
422 : 0 : len = pad_bytes;
423 : :
424 : : /* Don't do this again unless we replace another header. */
425 : 0 : mystreamer->rearchive_member = false;
426 : : }
427 [ # # ]: 0 : else if (context == BBSTREAMER_ARCHIVE_TRAILER)
428 : : {
429 : : /* Trailer should always be two blocks of zero bytes. */
430 : 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
431 : 0 : data = buffer;
432 : 0 : len = 2 * TAR_BLOCK_SIZE;
433 : : }
434 : :
435 : 0 : bbstreamer_content(streamer->bbs_next, member, data, len, context);
436 : 0 : }
437 : :
438 : : /*
439 : : * End-of-stream processing for a tar archiver.
440 : : */
441 : : static void
442 : 0 : bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
443 : : {
444 : 0 : bbstreamer_finalize(streamer->bbs_next);
445 : 0 : }
446 : :
447 : : /*
448 : : * Free memory associated with a tar archiver.
449 : : */
450 : : static void
451 : 0 : bbstreamer_tar_archiver_free(bbstreamer *streamer)
452 : : {
453 : 0 : bbstreamer_free(streamer->bbs_next);
454 : 0 : pfree(streamer);
455 : 0 : }
456 : :
457 : : /*
458 : : * Create a bbstreamer that blindly adds two blocks of NUL bytes to the
459 : : * end of an incomplete tarfile that the server might send us.
460 : : */
461 : : bbstreamer *
888 462 : 0 : bbstreamer_tar_terminator_new(bbstreamer *next)
463 : : {
464 : : bbstreamer *streamer;
465 : :
466 : 0 : streamer = palloc0(sizeof(bbstreamer));
467 : 0 : *((const bbstreamer_ops **) &streamer->bbs_ops) =
468 : : &bbstreamer_tar_terminator_ops;
469 : 0 : streamer->bbs_next = next;
470 : :
471 : 0 : return streamer;
472 : : }
473 : :
474 : : /*
475 : : * Pass all the content through without change.
476 : : */
477 : : static void
478 : 0 : bbstreamer_tar_terminator_content(bbstreamer *streamer,
479 : : bbstreamer_member *member,
480 : : const char *data, int len,
481 : : bbstreamer_archive_context context)
482 : : {
483 : : /* Expect unparsed input. */
484 [ # # ]: 0 : Assert(member == NULL);
485 [ # # ]: 0 : Assert(context == BBSTREAMER_UNKNOWN);
486 : :
487 : : /* Just forward it. */
488 : 0 : bbstreamer_content(streamer->bbs_next, member, data, len, context);
489 : 0 : }
490 : :
491 : : /*
492 : : * At the end, blindly add the two blocks of NUL bytes which the server fails
493 : : * to supply.
494 : : */
495 : : static void
496 : 0 : bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
497 : : {
498 : : char buffer[2 * TAR_BLOCK_SIZE];
499 : :
500 : 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
501 : 0 : bbstreamer_content(streamer->bbs_next, NULL, buffer,
502 : : 2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN);
503 : 0 : bbstreamer_finalize(streamer->bbs_next);
504 : 0 : }
505 : :
506 : : /*
507 : : * Free memory associated with a tar terminator.
508 : : */
509 : : static void
510 : 0 : bbstreamer_tar_terminator_free(bbstreamer *streamer)
511 : : {
512 : 0 : bbstreamer_free(streamer->bbs_next);
513 : 0 : pfree(streamer);
514 : 0 : }
|