Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bbstreamer_tar.c
4 : *
5 : * This module implements three types of tar processing. A tar parser
6 : * expects unlabelled chunks of data (e.g. BBSTREAMER_UNKNOWN) and splits
7 : * it into labelled chunks (any other value of bbstreamer_archive_context).
8 : * A tar archiver does the reverse: it takes a bunch of labelled chunks
9 : * and produces a tarfile, optionally replacing member headers and trailers
10 : * so that upstream bbstreamer objects can perform surgery on the tarfile
11 : * contents without knowing the details of the tar format. A tar terminator
12 : * just adds two blocks of NUL bytes to the end of the file, since older
13 : * server versions produce files with this terminator omitted.
14 : *
15 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
16 : *
17 : * IDENTIFICATION
18 : * src/bin/pg_basebackup/bbstreamer_tar.c
19 : *-------------------------------------------------------------------------
20 : */
21 :
22 : #include "postgres_fe.h"
23 :
24 : #include <time.h>
25 :
26 : #include "bbstreamer.h"
27 : #include "common/logging.h"
28 : #include "pgtar.h"
29 :
30 : typedef struct bbstreamer_tar_parser
31 : {
32 : bbstreamer base;
33 : bbstreamer_archive_context next_context;
34 : bbstreamer_member member;
35 : size_t file_bytes_sent;
36 : size_t pad_bytes_expected;
37 : } bbstreamer_tar_parser;
38 :
39 : typedef struct bbstreamer_tar_archiver
40 : {
41 : bbstreamer base;
42 : bool rearchive_member;
43 : } bbstreamer_tar_archiver;
44 :
45 : static void bbstreamer_tar_parser_content(bbstreamer *streamer,
46 : bbstreamer_member *member,
47 : const char *data, int len,
48 : bbstreamer_archive_context context);
49 : static void bbstreamer_tar_parser_finalize(bbstreamer *streamer);
50 : static void bbstreamer_tar_parser_free(bbstreamer *streamer);
51 : static bool bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer);
52 :
53 : const bbstreamer_ops bbstreamer_tar_parser_ops = {
54 : .content = bbstreamer_tar_parser_content,
55 : .finalize = bbstreamer_tar_parser_finalize,
56 : .free = bbstreamer_tar_parser_free
57 : };
58 :
59 : static void bbstreamer_tar_archiver_content(bbstreamer *streamer,
60 : bbstreamer_member *member,
61 : const char *data, int len,
62 : bbstreamer_archive_context context);
63 : static void bbstreamer_tar_archiver_finalize(bbstreamer *streamer);
64 : static void bbstreamer_tar_archiver_free(bbstreamer *streamer);
65 :
66 : const bbstreamer_ops bbstreamer_tar_archiver_ops = {
67 : .content = bbstreamer_tar_archiver_content,
68 : .finalize = bbstreamer_tar_archiver_finalize,
69 : .free = bbstreamer_tar_archiver_free
70 : };
71 :
72 : static void bbstreamer_tar_terminator_content(bbstreamer *streamer,
73 : bbstreamer_member *member,
74 : const char *data, int len,
75 : bbstreamer_archive_context context);
76 : static void bbstreamer_tar_terminator_finalize(bbstreamer *streamer);
77 : static void bbstreamer_tar_terminator_free(bbstreamer *streamer);
78 :
79 : const bbstreamer_ops bbstreamer_tar_terminator_ops = {
80 : .content = bbstreamer_tar_terminator_content,
81 : .finalize = bbstreamer_tar_terminator_finalize,
82 : .free = bbstreamer_tar_terminator_free
83 : };
84 :
85 : /*
86 : * Create a bbstreamer that can parse a stream of content as tar data.
87 : *
88 : * The input should be a series of BBSTREAMER_UNKNOWN chunks; the bbstreamer
89 : * specified by 'next' will receive a series of typed chunks, as per the
90 : * conventions described in bbstreamer.h.
91 : */
92 : extern bbstreamer *
520 rhaas 93 CBC 113 : bbstreamer_tar_parser_new(bbstreamer *next)
94 : {
95 : bbstreamer_tar_parser *streamer;
96 :
97 113 : streamer = palloc0(sizeof(bbstreamer_tar_parser));
98 113 : *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
99 : &bbstreamer_tar_parser_ops;
100 113 : streamer->base.bbs_next = next;
101 113 : initStringInfo(&streamer->base.bbs_buffer);
102 113 : streamer->next_context = BBSTREAMER_MEMBER_HEADER;
103 :
104 113 : return &streamer->base;
105 : }
106 :
107 : /*
108 : * Parse unknown content as tar data.
109 : */
110 : static void
111 291127 : bbstreamer_tar_parser_content(bbstreamer *streamer, bbstreamer_member *member,
112 : const char *data, int len,
113 : bbstreamer_archive_context context)
114 : {
115 291127 : bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
116 : size_t nbytes;
117 :
118 : /* Expect unparsed input. */
119 291127 : Assert(member == NULL);
120 291127 : Assert(context == BBSTREAMER_UNKNOWN);
121 :
122 587670 : while (len > 0)
123 : {
124 296653 : switch (mystreamer->next_context)
125 : {
126 98893 : case BBSTREAMER_MEMBER_HEADER:
127 :
128 : /*
129 : * If we're expecting an archive member header, accumulate a
130 : * full block of data before doing anything further.
131 : */
132 98893 : if (!bbstreamer_buffer_until(streamer, &data, &len,
133 : TAR_BLOCK_SIZE))
520 rhaas 134 UBC 0 : return;
135 :
136 : /*
137 : * Now we can process the header and get ready to process the
138 : * file contents; however, we might find out that what we
139 : * thought was the next file header is actually the start of
140 : * the archive trailer. Switch modes accordingly.
141 : */
520 rhaas 142 CBC 98893 : if (bbstreamer_tar_header(mystreamer))
143 : {
144 98783 : if (mystreamer->member.size == 0)
145 : {
146 : /* No content; trailer is zero-length. */
147 19267 : bbstreamer_content(mystreamer->base.bbs_next,
148 : &mystreamer->member,
149 : NULL, 0,
150 : BBSTREAMER_MEMBER_TRAILER);
151 :
152 : /* Expect next header. */
153 19267 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
154 : }
155 : else
156 : {
157 : /* Expect contents. */
158 79516 : mystreamer->next_context = BBSTREAMER_MEMBER_CONTENTS;
159 : }
160 98783 : mystreamer->base.bbs_buffer.len = 0;
161 98783 : mystreamer->file_bytes_sent = 0;
162 : }
163 : else
164 110 : mystreamer->next_context = BBSTREAMER_ARCHIVE_TRAILER;
165 98893 : break;
166 :
167 196236 : case BBSTREAMER_MEMBER_CONTENTS:
168 :
169 : /*
170 : * Send as much content as we have, but not more than the
171 : * remaining file length.
172 : */
173 196236 : Assert(mystreamer->file_bytes_sent < mystreamer->member.size);
174 196236 : nbytes = mystreamer->member.size - mystreamer->file_bytes_sent;
175 196236 : nbytes = Min(nbytes, len);
176 196236 : Assert(nbytes > 0);
177 196236 : bbstreamer_content(mystreamer->base.bbs_next,
178 : &mystreamer->member,
179 : data, nbytes,
180 : BBSTREAMER_MEMBER_CONTENTS);
181 196236 : mystreamer->file_bytes_sent += nbytes;
182 196236 : data += nbytes;
183 196236 : len -= nbytes;
184 :
185 : /*
186 : * If we've not yet sent the whole file, then there's more
187 : * content to come; otherwise, it's time to expect the file
188 : * trailer.
189 : */
190 196236 : Assert(mystreamer->file_bytes_sent <= mystreamer->member.size);
191 196236 : if (mystreamer->file_bytes_sent == mystreamer->member.size)
192 : {
193 79514 : if (mystreamer->pad_bytes_expected == 0)
194 : {
195 : /* Trailer is zero-length. */
196 78100 : bbstreamer_content(mystreamer->base.bbs_next,
197 : &mystreamer->member,
198 : NULL, 0,
199 : BBSTREAMER_MEMBER_TRAILER);
200 :
201 : /* Expect next header. */
202 78100 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
203 : }
204 : else
205 : {
206 : /* Trailer is not zero-length. */
207 1414 : mystreamer->next_context = BBSTREAMER_MEMBER_TRAILER;
208 : }
209 79514 : mystreamer->base.bbs_buffer.len = 0;
210 : }
211 196236 : break;
212 :
213 1414 : case BBSTREAMER_MEMBER_TRAILER:
214 :
215 : /*
216 : * If we're expecting an archive member trailer, accumulate
217 : * the expected number of padding bytes before sending
218 : * anything onward.
219 : */
220 1414 : if (!bbstreamer_buffer_until(streamer, &data, &len,
221 1414 : mystreamer->pad_bytes_expected))
520 rhaas 222 UBC 0 : return;
223 :
224 : /* OK, now we can send it. */
520 rhaas 225 CBC 1414 : bbstreamer_content(mystreamer->base.bbs_next,
226 : &mystreamer->member,
227 1414 : data, mystreamer->pad_bytes_expected,
228 : BBSTREAMER_MEMBER_TRAILER);
229 :
230 : /* Expect next file header. */
231 1414 : mystreamer->next_context = BBSTREAMER_MEMBER_HEADER;
232 1414 : mystreamer->base.bbs_buffer.len = 0;
233 1414 : break;
234 :
235 110 : case BBSTREAMER_ARCHIVE_TRAILER:
236 :
237 : /*
238 : * We've seen an end-of-archive indicator, so anything more is
239 : * buffered and sent as part of the archive trailer. But we
240 : * don't expect more than 2 blocks.
241 : */
242 110 : bbstreamer_buffer_bytes(streamer, &data, &len, len);
243 110 : if (len > 2 * TAR_BLOCK_SIZE)
366 tgl 244 UBC 0 : pg_fatal("tar file trailer exceeds 2 blocks");
520 rhaas 245 CBC 110 : return;
246 :
520 rhaas 247 UBC 0 : default:
248 : /* Shouldn't happen. */
366 tgl 249 0 : pg_fatal("unexpected state while parsing tar archive");
250 : }
251 : }
252 : }
253 :
254 : /*
255 : * Parse a file header within a tar stream.
256 : *
257 : * The return value is true if we found a file header and passed it on to the
258 : * next bbstreamer; it is false if we have reached the archive trailer.
259 : */
260 : static bool
520 rhaas 261 CBC 98893 : bbstreamer_tar_header(bbstreamer_tar_parser *mystreamer)
262 : {
263 98893 : bool has_nonzero_byte = false;
264 : int i;
265 98893 : bbstreamer_member *member = &mystreamer->member;
266 98893 : char *buffer = mystreamer->base.bbs_buffer.data;
267 :
268 98893 : Assert(mystreamer->base.bbs_buffer.len == TAR_BLOCK_SIZE);
269 :
270 : /* Check whether we've got a block of all zero bytes. */
271 155213 : for (i = 0; i < TAR_BLOCK_SIZE; ++i)
272 : {
273 155103 : if (buffer[i] != '\0')
274 : {
275 98783 : has_nonzero_byte = true;
276 98783 : break;
277 : }
278 : }
279 :
280 : /*
281 : * If the entire block was zeros, this is the end of the archive, not the
282 : * start of the next file.
283 : */
284 98893 : if (!has_nonzero_byte)
285 110 : return false;
286 :
287 : /*
288 : * Parse key fields out of the header.
289 : *
290 : * FIXME: It's terrible that we use hard-coded values here instead of some
291 : * more principled approach. It's been like this for a long time, but we
292 : * ought to do better.
293 : */
294 98783 : strlcpy(member->pathname, &buffer[0], MAXPGPATH);
295 98783 : if (member->pathname[0] == '\0')
366 tgl 296 UBC 0 : pg_fatal("tar member has empty name");
520 rhaas 297 CBC 98783 : member->size = read_tar_number(&buffer[124], 12);
298 98783 : member->mode = read_tar_number(&buffer[100], 8);
299 98783 : member->uid = read_tar_number(&buffer[108], 8);
300 98783 : member->gid = read_tar_number(&buffer[116], 8);
301 98783 : member->is_directory = (buffer[156] == '5');
302 98783 : member->is_link = (buffer[156] == '2');
303 98783 : if (member->is_link)
304 13 : strlcpy(member->linktarget, &buffer[157], 100);
305 :
306 : /* Compute number of padding bytes. */
307 98783 : mystreamer->pad_bytes_expected = tarPaddingBytesRequired(member->size);
308 :
309 : /* Forward the entire header to the next bbstreamer. */
310 98783 : bbstreamer_content(mystreamer->base.bbs_next, member,
311 : buffer, TAR_BLOCK_SIZE,
312 : BBSTREAMER_MEMBER_HEADER);
313 :
314 98783 : return true;
315 : }
316 :
317 : /*
318 : * End-of-stream processing for a tar parser.
319 : */
320 : static void
321 110 : bbstreamer_tar_parser_finalize(bbstreamer *streamer)
322 : {
323 110 : bbstreamer_tar_parser *mystreamer = (bbstreamer_tar_parser *) streamer;
324 :
325 110 : if (mystreamer->next_context != BBSTREAMER_ARCHIVE_TRAILER &&
520 rhaas 326 UBC 0 : (mystreamer->next_context != BBSTREAMER_MEMBER_HEADER ||
327 0 : mystreamer->base.bbs_buffer.len > 0))
366 tgl 328 0 : pg_fatal("COPY stream ended before last file was finished");
329 :
330 : /* Send the archive trailer, even if empty. */
520 rhaas 331 CBC 110 : bbstreamer_content(streamer->bbs_next, NULL,
332 110 : streamer->bbs_buffer.data, streamer->bbs_buffer.len,
333 : BBSTREAMER_ARCHIVE_TRAILER);
334 :
335 : /* Now finalize successor. */
336 110 : bbstreamer_finalize(streamer->bbs_next);
337 110 : }
338 :
339 : /*
340 : * Free memory associated with a tar parser.
341 : */
342 : static void
343 110 : bbstreamer_tar_parser_free(bbstreamer *streamer)
344 : {
345 110 : pfree(streamer->bbs_buffer.data);
346 110 : bbstreamer_free(streamer->bbs_next);
347 110 : }
348 :
349 : /*
350 : * Create an bbstreamer that can generate a tar archive.
351 : *
352 : * This is intended to be usable either for generating a brand-new tar archive
353 : * or for modifying one on the fly. The input should be a series of typed
354 : * chunks (i.e. not BBSTREAMER_UNKNOWN). See also the comments for
355 : * bbstreamer_tar_parser_content.
356 : */
357 : extern bbstreamer *
520 rhaas 358 UBC 0 : bbstreamer_tar_archiver_new(bbstreamer *next)
359 : {
360 : bbstreamer_tar_archiver *streamer;
361 :
362 0 : streamer = palloc0(sizeof(bbstreamer_tar_archiver));
363 0 : *((const bbstreamer_ops **) &streamer->base.bbs_ops) =
364 : &bbstreamer_tar_archiver_ops;
365 0 : streamer->base.bbs_next = next;
366 :
367 0 : return &streamer->base;
368 : }
369 :
370 : /*
371 : * Fix up the stream of input chunks to create a valid tar file.
372 : *
373 : * If a BBSTREAMER_MEMBER_HEADER chunk is of size 0, it is replaced with a
374 : * newly-constructed tar header. If it is of size TAR_BLOCK_SIZE, it is
375 : * passed through without change. Any other size is a fatal error (and
376 : * indicates a bug).
377 : *
378 : * Whenever a new BBSTREAMER_MEMBER_HEADER chunk is constructed, the
379 : * corresponding BBSTREAMER_MEMBER_TRAILER chunk is also constructed from
380 : * scratch. Specifically, we construct a block of zero bytes sufficient to
381 : * pad out to a block boundary, as required by the tar format. Other
382 : * BBSTREAMER_MEMBER_TRAILER chunks are passed through without change.
383 : *
384 : * Any BBSTREAMER_MEMBER_CONTENTS chunks are passed through without change.
385 : *
386 : * The BBSTREAMER_ARCHIVE_TRAILER chunk is replaced with two
387 : * blocks of zero bytes. Not all tar programs require this, but apparently
388 : * some do. The server does not supply this trailer. If no archive trailer is
389 : * present, one will be added by bbstreamer_tar_parser_finalize.
390 : */
391 : static void
392 0 : bbstreamer_tar_archiver_content(bbstreamer *streamer,
393 : bbstreamer_member *member,
394 : const char *data, int len,
395 : bbstreamer_archive_context context)
396 : {
397 0 : bbstreamer_tar_archiver *mystreamer = (bbstreamer_tar_archiver *) streamer;
398 : char buffer[2 * TAR_BLOCK_SIZE];
399 :
400 0 : Assert(context != BBSTREAMER_UNKNOWN);
401 :
402 0 : if (context == BBSTREAMER_MEMBER_HEADER && len != TAR_BLOCK_SIZE)
403 : {
404 0 : Assert(len == 0);
405 :
406 : /* Replace zero-length tar header with a newly constructed one. */
407 0 : tarCreateHeader(buffer, member->pathname, NULL,
408 : member->size, member->mode, member->uid, member->gid,
409 : time(NULL));
410 0 : data = buffer;
411 0 : len = TAR_BLOCK_SIZE;
412 :
413 : /* Also make a note to replace padding, in case size changed. */
414 0 : mystreamer->rearchive_member = true;
415 : }
416 0 : else if (context == BBSTREAMER_MEMBER_TRAILER &&
417 0 : mystreamer->rearchive_member)
418 0 : {
419 0 : int pad_bytes = tarPaddingBytesRequired(member->size);
420 :
421 : /* Also replace padding, if we regenerated the header. */
422 0 : memset(buffer, 0, pad_bytes);
423 0 : data = buffer;
424 0 : len = pad_bytes;
425 :
426 : /* Don't do this again unless we replace another header. */
427 0 : mystreamer->rearchive_member = false;
428 : }
429 0 : else if (context == BBSTREAMER_ARCHIVE_TRAILER)
430 : {
431 : /* Trailer should always be two blocks of zero bytes. */
432 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
433 0 : data = buffer;
434 0 : len = 2 * TAR_BLOCK_SIZE;
435 : }
436 :
437 0 : bbstreamer_content(streamer->bbs_next, member, data, len, context);
438 0 : }
439 :
440 : /*
441 : * End-of-stream processing for a tar archiver.
442 : */
443 : static void
444 0 : bbstreamer_tar_archiver_finalize(bbstreamer *streamer)
445 : {
446 0 : bbstreamer_finalize(streamer->bbs_next);
447 0 : }
448 :
449 : /*
450 : * Free memory associated with a tar archiver.
451 : */
452 : static void
453 0 : bbstreamer_tar_archiver_free(bbstreamer *streamer)
454 : {
455 0 : bbstreamer_free(streamer->bbs_next);
456 0 : pfree(streamer);
457 0 : }
458 :
459 : /*
460 : * Create a bbstreamer that blindly adds two blocks of NUL bytes to the
461 : * end of an incomplete tarfile that the server might send us.
462 : */
463 : bbstreamer *
517 464 0 : bbstreamer_tar_terminator_new(bbstreamer *next)
465 : {
466 : bbstreamer *streamer;
467 :
468 0 : streamer = palloc0(sizeof(bbstreamer));
469 0 : *((const bbstreamer_ops **) &streamer->bbs_ops) =
470 : &bbstreamer_tar_terminator_ops;
471 0 : streamer->bbs_next = next;
472 :
473 0 : return streamer;
474 : }
475 :
476 : /*
477 : * Pass all the content through without change.
478 : */
479 : static void
480 0 : bbstreamer_tar_terminator_content(bbstreamer *streamer,
481 : bbstreamer_member *member,
482 : const char *data, int len,
483 : bbstreamer_archive_context context)
484 : {
485 : /* Expect unparsed input. */
486 0 : Assert(member == NULL);
487 0 : Assert(context == BBSTREAMER_UNKNOWN);
488 :
489 : /* Just forward it. */
490 0 : bbstreamer_content(streamer->bbs_next, member, data, len, context);
491 0 : }
492 :
493 : /*
494 : * At the end, blindly add the two blocks of NUL bytes which the server fails
495 : * to supply.
496 : */
497 : static void
498 0 : bbstreamer_tar_terminator_finalize(bbstreamer *streamer)
499 : {
500 : char buffer[2 * TAR_BLOCK_SIZE];
501 :
502 0 : memset(buffer, 0, 2 * TAR_BLOCK_SIZE);
503 0 : bbstreamer_content(streamer->bbs_next, NULL, buffer,
504 : 2 * TAR_BLOCK_SIZE, BBSTREAMER_UNKNOWN);
505 0 : bbstreamer_finalize(streamer->bbs_next);
506 0 : }
507 :
508 : /*
509 : * Free memory associated with a tar terminator.
510 : */
511 : static void
512 0 : bbstreamer_tar_terminator_free(bbstreamer *streamer)
513 : {
514 0 : bbstreamer_free(streamer->bbs_next);
515 0 : pfree(streamer);
516 0 : }
|