Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * filemap.c
4 : : * A data structure for keeping track of files that have changed.
5 : : *
6 : : * This source file contains the logic to decide what to do with different
7 : : * kinds of files, and the data structure to support it. Before modifying
8 : : * anything, pg_rewind collects information about all the files and their
9 : : * attributes in the target and source data directories. It also scans the
10 : : * WAL log in the target, and collects information about data blocks that
11 : : * were changed. All this information is stored in a hash table, using the
12 : : * file path relative to the root of the data directory as the key.
13 : : *
14 : : * After collecting all the information required, the decide_file_actions()
15 : : * function scans the hash table and decides what action needs to be taken
16 : : * for each file. Finally, it sorts the array to the final order that the
17 : : * actions should be executed in.
18 : : *
19 : : * Copyright (c) 2013-2024, PostgreSQL Global Development Group
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : :
24 : : #include "postgres_fe.h"
25 : :
26 : : #include <sys/stat.h>
27 : : #include <unistd.h>
28 : :
29 : : #include "catalog/pg_tablespace_d.h"
30 : : #include "common/file_utils.h"
31 : : #include "common/hashfn_unstable.h"
32 : : #include "common/string.h"
33 : : #include "datapagemap.h"
34 : : #include "filemap.h"
35 : : #include "pg_rewind.h"
36 : :
37 : : /*
38 : : * Define a hash table which we can use to store information about the files
39 : : * appearing in source and target systems.
40 : : */
41 : : #define SH_PREFIX filehash
42 : : #define SH_ELEMENT_TYPE file_entry_t
43 : : #define SH_KEY_TYPE const char *
44 : : #define SH_KEY path
45 : : #define SH_HASH_KEY(tb, key) hash_string(key)
46 : : #define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0)
47 : : #define SH_SCOPE static inline
48 : : #define SH_RAW_ALLOCATOR pg_malloc0
49 : : #define SH_DECLARE
50 : : #define SH_DEFINE
51 : : #include "lib/simplehash.h"
52 : :
53 : : #define FILEHASH_INITIAL_SIZE 1000
54 : :
55 : : static filehash_hash *filehash;
56 : :
57 : : static bool isRelDataFile(const char *path);
58 : : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
59 : : BlockNumber segno);
60 : :
61 : : static file_entry_t *insert_filehash_entry(const char *path);
62 : : static file_entry_t *lookup_filehash_entry(const char *path);
63 : : static int final_filemap_cmp(const void *a, const void *b);
64 : : static bool check_file_excluded(const char *path, bool is_source);
65 : :
66 : : /*
67 : : * Definition of one element part of an exclusion list, used to exclude
68 : : * contents when rewinding. "name" is the name of the file or path to
69 : : * check for exclusion. If "match_prefix" is true, any items matching
70 : : * the name as prefix are excluded.
71 : : */
72 : : struct exclude_list_item
73 : : {
74 : : const char *name;
75 : : bool match_prefix;
76 : : };
77 : :
78 : : /*
79 : : * The contents of these directories are removed or recreated during server
80 : : * start so they are not included in data processed by pg_rewind.
81 : : *
82 : : * Note: those lists should be kept in sync with what basebackup.c provides.
83 : : * Some of the values, contrary to what basebackup.c uses, are hardcoded as
84 : : * they are defined in backend-only headers. So this list is maintained
85 : : * with a best effort in mind.
86 : : */
87 : : static const char *const excludeDirContents[] =
88 : : {
89 : : /*
90 : : * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
91 : : * because extensions like pg_stat_statements store data there.
92 : : */
93 : : "pg_stat_tmp", /* defined as PG_STAT_TMP_DIR */
94 : :
95 : : /*
96 : : * It is generally not useful to backup the contents of this directory
97 : : * even if the intention is to restore to another primary. See backup.sgml
98 : : * for a more detailed description.
99 : : */
100 : : "pg_replslot",
101 : :
102 : : /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
103 : : "pg_dynshmem", /* defined as PG_DYNSHMEM_DIR */
104 : :
105 : : /* Contents removed on startup, see AsyncShmemInit(). */
106 : : "pg_notify",
107 : :
108 : : /*
109 : : * Old contents are loaded for possible debugging but are not required for
110 : : * normal operation, see SerialInit().
111 : : */
112 : : "pg_serial",
113 : :
114 : : /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
115 : : "pg_snapshots",
116 : :
117 : : /* Contents zeroed on startup, see StartupSUBTRANS(). */
118 : : "pg_subtrans",
119 : :
120 : : /* end of list */
121 : : NULL
122 : : };
123 : :
124 : : /*
125 : : * List of files excluded from filemap processing. Files are excluded
126 : : * if their prefix match.
127 : : */
128 : : static const struct exclude_list_item excludeFiles[] =
129 : : {
130 : : /* Skip auto conf temporary file. */
131 : : {"postgresql.auto.conf.tmp", false}, /* defined as PG_AUTOCONF_FILENAME */
132 : :
133 : : /* Skip current log file temporary file */
134 : : {"current_logfiles.tmp", false}, /* defined as
135 : : * LOG_METAINFO_DATAFILE_TMP */
136 : :
137 : : /* Skip relation cache because it is rebuilt on startup */
138 : : {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
139 : :
140 : : /*
141 : : * If there is a backup_label or tablespace_map file, it indicates that a
142 : : * recovery failed and this cluster probably can't be rewound, but exclude
143 : : * them anyway if they are found.
144 : : */
145 : : {"backup_label", false}, /* defined as BACKUP_LABEL_FILE */
146 : : {"tablespace_map", false}, /* defined as TABLESPACE_MAP */
147 : :
148 : : /*
149 : : * If there's a backup_manifest, it belongs to a backup that was used to
150 : : * start this server. It is *not* correct for this backup. Our
151 : : * backup_manifest is injected into the backup separately if users want
152 : : * it.
153 : : */
154 : : {"backup_manifest", false},
155 : :
156 : : {"postmaster.pid", false},
157 : : {"postmaster.opts", false},
158 : :
159 : : /* end of list */
160 : : {NULL, false}
161 : : };
162 : :
163 : : /*
164 : : * Initialize the hash table for the file map.
165 : : */
166 : : void
1257 heikki.linnakangas@i 167 :CBC 13 : filehash_init(void)
168 : : {
169 : 13 : filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
3310 170 : 13 : }
171 : :
172 : : /* Look up entry for 'path', creating a new one if it doesn't exist */
173 : : static file_entry_t *
1257 174 : 29864 : insert_filehash_entry(const char *path)
175 : : {
176 : : file_entry_t *entry;
177 : : bool found;
178 : :
179 : 29864 : entry = filehash_insert(filehash, path, &found);
180 [ + + ]: 29864 : if (!found)
181 : : {
182 : 15614 : entry->path = pg_strdup(path);
183 : 15614 : entry->isrelfile = isRelDataFile(path);
184 : :
185 : 15614 : entry->target_exists = false;
186 : 15614 : entry->target_type = FILE_TYPE_UNDEFINED;
187 : 15614 : entry->target_size = 0;
188 : 15614 : entry->target_link_target = NULL;
189 : 15614 : entry->target_pages_to_overwrite.bitmap = NULL;
190 : 15614 : entry->target_pages_to_overwrite.bitmapsize = 0;
191 : :
192 : 15614 : entry->source_exists = false;
193 : 15614 : entry->source_type = FILE_TYPE_UNDEFINED;
194 : 15614 : entry->source_size = 0;
195 : 15614 : entry->source_link_target = NULL;
196 : :
197 : 15614 : entry->action = FILE_ACTION_UNDECIDED;
198 : : }
199 : :
200 : 29864 : return entry;
201 : : }
202 : :
203 : : static file_entry_t *
204 : 84483 : lookup_filehash_entry(const char *path)
205 : : {
206 : 84483 : return filehash_lookup(filehash, path);
207 : : }
208 : :
209 : : /*
210 : : * Callback for processing source file list.
211 : : *
212 : : * This is called once for every file in the source server. We record the
213 : : * type and size of the file, so that decide_file_action() can later decide what
214 : : * to do with it.
215 : : */
216 : : void
217 : 14948 : process_source_file(const char *path, file_type_t type, size_t size,
218 : : const char *link_target)
219 : : {
220 : : file_entry_t *entry;
221 : :
222 : : /*
223 : : * Pretend that pg_wal is a directory, even if it's really a symlink. We
224 : : * don't want to mess with the symlink itself, nor complain if it's a
225 : : * symlink in source but not in target or vice versa.
226 : : */
2733 rhaas@postgresql.org 227 [ + + - + ]: 14948 : if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
3177 heikki.linnakangas@i 228 :UBC 0 : type = FILE_TYPE_DIRECTORY;
229 : :
230 : : /*
231 : : * sanity check: a filename that looks like a data file better be a
232 : : * regular file
233 : : */
3310 heikki.linnakangas@i 234 [ + + - + ]:CBC 14948 : if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
1840 peter@eisentraut.org 235 :UBC 0 : pg_fatal("data file \"%s\" in source is not a regular file", path);
236 : :
237 : : /* Remember this source file */
1257 heikki.linnakangas@i 238 :CBC 14948 : entry = insert_filehash_entry(path);
239 [ - + ]: 14948 : if (entry->source_exists)
1257 heikki.linnakangas@i 240 :UBC 0 : pg_fatal("duplicate source file \"%s\"", path);
1257 heikki.linnakangas@i 241 :CBC 14948 : entry->source_exists = true;
242 : 14948 : entry->source_type = type;
243 : 14948 : entry->source_size = size;
244 [ + + ]: 14948 : entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
3310 245 : 14948 : }
246 : :
247 : : /*
248 : : * Callback for processing target file list.
249 : : *
250 : : * Record the type and size of the file, like process_source_file() does.
251 : : */
252 : : void
1257 253 : 14916 : process_target_file(const char *path, file_type_t type, size_t size,
254 : : const char *link_target)
255 : : {
256 : : file_entry_t *entry;
257 : :
258 : : /*
259 : : * Do not apply any exclusion filters here. This has advantage to remove
260 : : * from the target data folder all paths which have been filtered out from
261 : : * the source data folder when processing the source files.
262 : : */
263 : :
264 : : /*
265 : : * Like in process_source_file, pretend that pg_wal is always a directory.
266 : : */
2733 rhaas@postgresql.org 267 [ + + + + ]: 14916 : if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
3177 heikki.linnakangas@i 268 : 2 : type = FILE_TYPE_DIRECTORY;
269 : :
270 : : /* Remember this target file */
1257 271 : 14916 : entry = insert_filehash_entry(path);
272 [ - + ]: 14916 : if (entry->target_exists)
1257 heikki.linnakangas@i 273 :UBC 0 : pg_fatal("duplicate source file \"%s\"", path);
1257 heikki.linnakangas@i 274 :CBC 14916 : entry->target_exists = true;
275 : 14916 : entry->target_type = type;
276 : 14916 : entry->target_size = size;
277 [ + + ]: 14916 : entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
3310 278 : 14916 : }
279 : :
280 : : /*
281 : : * This callback gets called while we read the WAL in the target, for every
282 : : * block that has changed in the target system. It decides if the given
283 : : * 'blkno' in the target relfile needs to be overwritten from the source, and
284 : : * if so, records it in 'target_pages_to_overwrite' bitmap.
285 : : *
286 : : * NOTE: All the files on both systems must have already been added to the
287 : : * hash table!
288 : : */
289 : : void
648 rhaas@postgresql.org 290 : 84483 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
291 : : BlockNumber blkno)
292 : : {
293 : : char *path;
294 : : file_entry_t *entry;
295 : : BlockNumber blkno_inseg;
296 : : int segno;
297 : :
3310 heikki.linnakangas@i 298 : 84483 : segno = blkno / RELSEG_SIZE;
299 : 84483 : blkno_inseg = blkno % RELSEG_SIZE;
300 : :
648 rhaas@postgresql.org 301 : 84483 : path = datasegpath(rlocator, forknum, segno);
1257 heikki.linnakangas@i 302 : 84483 : entry = lookup_filehash_entry(path);
3230 fujii@postgresql.org 303 : 84483 : pfree(path);
304 : :
305 : : /*
306 : : * If the block still exists in both systems, remember it. Otherwise we
307 : : * can safely ignore it.
308 : : *
309 : : * If the block is beyond the EOF in the source system, or the file
310 : : * doesn't exist in the source at all, we're going to truncate/remove it
311 : : * away from the target anyway. Likewise, if it doesn't exist in the
312 : : * target anymore, we will copy it over with the "tail" from the source
313 : : * system, anyway.
314 : : *
315 : : * It is possible to find WAL for a file that doesn't exist on either
316 : : * system anymore. It means that the relation was dropped later in the
317 : : * target system, and independently on the source system too, or that it
318 : : * was created and dropped in the target system and it never existed in
319 : : * the source. Either way, we can safely ignore it.
320 : : */
3310 heikki.linnakangas@i 321 [ + - ]: 84483 : if (entry)
322 : : {
1257 323 [ - + ]: 84483 : Assert(entry->isrelfile);
324 : :
1251 325 [ + + ]: 84483 : if (entry->target_exists)
326 : : {
327 [ - + ]: 84479 : if (entry->target_type != FILE_TYPE_REGULAR)
1251 heikki.linnakangas@i 328 :UBC 0 : pg_fatal("unexpected page modification for non-regular file \"%s\"",
329 : : entry->path);
330 : :
1251 heikki.linnakangas@i 331 [ + + ]:CBC 84479 : if (entry->source_exists)
332 : : {
333 : : off_t end_offset;
334 : :
335 : 82835 : end_offset = (blkno_inseg + 1) * BLCKSZ;
336 [ + + + + ]: 82835 : if (end_offset <= entry->source_size && end_offset <= entry->target_size)
337 : 2835 : datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
338 : : }
339 : : }
340 : : }
3310 341 : 84483 : }
342 : :
343 : : /*
344 : : * Is this the path of file that pg_rewind can skip copying?
345 : : */
346 : : static bool
2040 peter_e@gmx.net 347 : 15599 : check_file_excluded(const char *path, bool is_source)
348 : : {
349 : : char localpath[MAXPGPATH];
350 : : int excludeIdx;
351 : : const char *filename;
352 : :
353 : : /*
354 : : * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
355 : : */
1257 heikki.linnakangas@i 356 [ + + ]: 15599 : if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
357 [ - + ]: 15586 : strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
358 : : {
359 : 13 : return true;
360 : : }
361 : :
362 : : /* check individual files... */
1511 michael@paquier.xyz 363 [ + + ]: 140006 : for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
364 : : {
365 : 124486 : int cmplen = strlen(excludeFiles[excludeIdx].name);
366 : :
2208 fujii@postgresql.org 367 : 124486 : filename = last_dir_separator(path);
368 [ + + ]: 124486 : if (filename == NULL)
369 : 2696 : filename = path;
370 : : else
371 : 121790 : filename++;
372 : :
1511 michael@paquier.xyz 373 [ + + ]: 124486 : if (!excludeFiles[excludeIdx].match_prefix)
374 : 108900 : cmplen++;
375 [ + + ]: 124486 : if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
376 : : {
2040 peter_e@gmx.net 377 [ + - ]: 66 : if (is_source)
1840 peter@eisentraut.org 378 [ + - ]: 66 : pg_log_debug("entry \"%s\" excluded from source file list",
379 : : path);
380 : : else
1840 peter@eisentraut.org 381 [ # # ]:UBC 0 : pg_log_debug("entry \"%s\" excluded from target file list",
382 : : path);
2208 fujii@postgresql.org 383 :CBC 66 : return true;
384 : : }
385 : : }
386 : :
387 : : /*
388 : : * ... And check some directories. Note that this includes any contents
389 : : * within the directories themselves.
390 : : */
391 [ + + ]: 124147 : for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
392 : : {
393 : 108640 : snprintf(localpath, sizeof(localpath), "%s/",
2208 fujii@postgresql.org 394 :GIC 108640 : excludeDirContents[excludeIdx]);
2208 fujii@postgresql.org 395 [ + + ]:CBC 108640 : if (strstr(path, localpath) == path)
396 : : {
2040 peter_e@gmx.net 397 [ + - ]: 13 : if (is_source)
1840 peter@eisentraut.org 398 [ + - ]: 13 : pg_log_debug("entry \"%s\" excluded from source file list",
399 : : path);
400 : : else
1840 peter@eisentraut.org 401 [ # # ]:UBC 0 : pg_log_debug("entry \"%s\" excluded from target file list",
402 : : path);
2208 fujii@postgresql.org 403 :CBC 13 : return true;
404 : : }
405 : : }
406 : :
407 : 15507 : return false;
408 : : }
409 : :
410 : : static const char *
3310 heikki.linnakangas@i 411 : 5301 : action_to_str(file_action_t action)
412 : : {
413 [ + + + + : 5301 : switch (action)
+ + - ]
414 : : {
415 : 398 : case FILE_ACTION_NONE:
416 : 398 : return "NONE";
417 : 4177 : case FILE_ACTION_COPY:
418 : 4177 : return "COPY";
419 : 4 : case FILE_ACTION_TRUNCATE:
420 : 4 : return "TRUNCATE";
421 : 5 : case FILE_ACTION_COPY_TAIL:
422 : 5 : return "COPY_TAIL";
423 : 9 : case FILE_ACTION_CREATE:
424 : 9 : return "CREATE";
425 : 708 : case FILE_ACTION_REMOVE:
426 : 708 : return "REMOVE";
427 : :
3310 heikki.linnakangas@i 428 :UBC 0 : default:
429 : 0 : return "unknown";
430 : : }
431 : : }
432 : :
433 : : /*
434 : : * Calculate the totals needed for progress reports.
435 : : */
436 : : void
1257 437 : 0 : calculate_totals(filemap_t *filemap)
438 : : {
439 : : file_entry_t *entry;
440 : : int i;
441 : :
442 : 0 : filemap->total_size = 0;
443 : 0 : filemap->fetch_size = 0;
444 : :
445 [ # # ]: 0 : for (i = 0; i < filemap->nentries; i++)
446 : : {
447 : 0 : entry = filemap->entries[i];
448 : :
449 [ # # ]: 0 : if (entry->source_type != FILE_TYPE_REGULAR)
3310 450 : 0 : continue;
451 : :
1257 452 : 0 : filemap->total_size += entry->source_size;
453 : :
3310 454 [ # # ]: 0 : if (entry->action == FILE_ACTION_COPY)
455 : : {
1257 456 : 0 : filemap->fetch_size += entry->source_size;
3310 457 : 0 : continue;
458 : : }
459 : :
460 [ # # ]: 0 : if (entry->action == FILE_ACTION_COPY_TAIL)
1257 461 : 0 : filemap->fetch_size += (entry->source_size - entry->target_size);
462 : :
463 [ # # ]: 0 : if (entry->target_pages_to_overwrite.bitmapsize > 0)
464 : : {
465 : : datapagemap_iterator_t *iter;
466 : : BlockNumber blk;
467 : :
468 : 0 : iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
3310 469 [ # # ]: 0 : while (datapagemap_next(iter, &blk))
1257 470 : 0 : filemap->fetch_size += BLCKSZ;
471 : :
3310 472 : 0 : pg_free(iter);
473 : : }
474 : : }
475 : 0 : }
476 : :
477 : : void
1257 heikki.linnakangas@i 478 :CBC 13 : print_filemap(filemap_t *filemap)
479 : : {
480 : : file_entry_t *entry;
481 : : int i;
482 : :
483 [ + + ]: 15627 : for (i = 0; i < filemap->nentries; i++)
484 : : {
485 : 15614 : entry = filemap->entries[i];
3310 486 [ + + ]: 15614 : if (entry->action != FILE_ACTION_NONE ||
1257 487 [ + + ]: 10711 : entry->target_pages_to_overwrite.bitmapsize > 0)
488 : : {
1840 peter@eisentraut.org 489 [ + - ]: 5301 : pg_log_debug("%s (%s)", entry->path,
490 : : action_to_str(entry->action));
491 : :
1257 heikki.linnakangas@i 492 [ + + ]: 5301 : if (entry->target_pages_to_overwrite.bitmapsize > 0)
493 : 406 : datapagemap_print(&entry->target_pages_to_overwrite);
494 : : }
495 : : }
3310 496 : 13 : fflush(stdout);
497 : 13 : }
498 : :
499 : : /*
500 : : * Does it look like a relation data file?
501 : : *
502 : : * For our purposes, only files belonging to the main fork are considered
503 : : * relation files. Other forks are always copied in toto, because we cannot
504 : : * reliably track changes to them, because WAL only contains block references
505 : : * for the main fork.
506 : : */
507 : : static bool
508 : 15981 : isRelDataFile(const char *path)
509 : : {
510 : : RelFileLocator rlocator;
511 : : unsigned int segNo;
512 : : int nmatch;
513 : : bool matched;
514 : :
515 : : /*----
516 : : * Relation data files can be in one of the following directories:
517 : : *
518 : : * global/
519 : : * shared relations
520 : : *
521 : : * base/<db oid>/
522 : : * regular relations, default tablespace
523 : : *
524 : : * pg_tblspc/<tblspc oid>/<tblspc version>/
525 : : * within a non-default tablespace (the name of the directory
526 : : * depends on version)
527 : : *
528 : : * And the relation data files themselves have a filename like:
529 : : *
530 : : * <oid>.<segment number>
531 : : *
532 : : *----
533 : : */
648 rhaas@postgresql.org 534 : 15981 : rlocator.spcOid = InvalidOid;
535 : 15981 : rlocator.dbOid = InvalidOid;
536 : 15981 : rlocator.relNumber = InvalidRelFileNumber;
3310 heikki.linnakangas@i 537 : 15981 : segNo = 0;
538 : 15981 : matched = false;
539 : :
564 rhaas@postgresql.org 540 : 15981 : nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
3310 heikki.linnakangas@i 541 [ + + - + ]: 15981 : if (nmatch == 1 || nmatch == 2)
542 : : {
648 rhaas@postgresql.org 543 : 780 : rlocator.spcOid = GLOBALTABLESPACE_OID;
544 : 780 : rlocator.dbOid = 0;
3310 heikki.linnakangas@i 545 : 780 : matched = true;
546 : : }
547 : : else
548 : : {
564 rhaas@postgresql.org 549 : 15201 : nmatch = sscanf(path, "base/%u/%u.%u",
550 : : &rlocator.dbOid, &rlocator.relNumber, &segNo);
3310 heikki.linnakangas@i 551 [ + + - + ]: 15201 : if (nmatch == 2 || nmatch == 3)
552 : : {
648 rhaas@postgresql.org 553 : 13998 : rlocator.spcOid = DEFAULTTABLESPACE_OID;
3310 heikki.linnakangas@i 554 : 13998 : matched = true;
555 : : }
556 : : else
557 : : {
564 rhaas@postgresql.org 558 : 1203 : nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
559 : : &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
560 : : &segNo);
2231 fujii@postgresql.org 561 [ + + - + ]: 1203 : if (nmatch == 3 || nmatch == 4)
3310 heikki.linnakangas@i 562 :GBC 12 : matched = true;
563 : : }
564 : : }
565 : :
566 : : /*
567 : : * The sscanf tests above can match files that have extra characters at
568 : : * the end. To eliminate such cases, cross-check that GetRelationPath
569 : : * creates the exact same filename, when passed the RelFileLocator
570 : : * information we extracted from the filename.
571 : : */
3310 heikki.linnakangas@i 572 [ + + ]:CBC 15981 : if (matched)
573 : : {
648 rhaas@postgresql.org 574 : 14790 : char *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
575 : :
3310 heikki.linnakangas@i 576 [ + + ]: 14790 : if (strcmp(check_path, path) != 0)
577 : 3620 : matched = false;
578 : :
579 : 14790 : pfree(check_path);
580 : : }
581 : :
582 : 15981 : return matched;
583 : : }
584 : :
585 : : /*
586 : : * A helper function to create the path of a relation file and segment.
587 : : *
588 : : * The returned path is palloc'd
589 : : */
590 : : static char *
648 rhaas@postgresql.org 591 : 99273 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
592 : : {
593 : : char *path;
594 : : char *segpath;
595 : :
596 : 99273 : path = relpathperm(rlocator, forknum);
3310 heikki.linnakangas@i 597 [ - + ]: 99273 : if (segno > 0)
598 : : {
3310 heikki.linnakangas@i 599 :UBC 0 : segpath = psprintf("%s.%u", path, segno);
600 : 0 : pfree(path);
601 : 0 : return segpath;
602 : : }
603 : : else
3310 heikki.linnakangas@i 604 :CBC 99273 : return path;
605 : : }
606 : :
607 : : /*
608 : : * In the final stage, the filemap is sorted so that removals come last.
609 : : * From disk space usage point of view, it would be better to do removals
610 : : * first, but for now, safety first. If a whole directory is deleted, all
611 : : * files and subdirectories inside it need to removed first. On creation,
612 : : * parent directory needs to be created before files and directories inside
613 : : * it. To achieve that, the file_action_t enum is ordered so that we can
614 : : * just sort on that first. Furthermore, sort REMOVE entries in reverse
615 : : * path order, so that "foo/bar" subdirectory is removed before "foo".
616 : : */
617 : : static int
618 : 165589 : final_filemap_cmp(const void *a, const void *b)
619 : : {
620 : 165589 : file_entry_t *fa = *((file_entry_t **) a);
621 : 165589 : file_entry_t *fb = *((file_entry_t **) b);
622 : :
623 [ + + ]: 165589 : if (fa->action > fb->action)
624 : 5251 : return 1;
625 [ + + ]: 160338 : if (fa->action < fb->action)
626 : 9819 : return -1;
627 : :
628 [ + + ]: 150519 : if (fa->action == FILE_ACTION_REMOVE)
2018 tgl@sss.pgh.pa.us 629 : 5580 : return strcmp(fb->path, fa->path);
630 : : else
3310 heikki.linnakangas@i 631 : 144939 : return strcmp(fa->path, fb->path);
632 : : }
633 : :
634 : : /*
635 : : * Decide what action to perform to a file.
636 : : */
637 : : static file_action_t
1257 638 : 15614 : decide_file_action(file_entry_t *entry)
639 : : {
640 : 15614 : const char *path = entry->path;
641 : :
642 : : /*
643 : : * Don't touch the control file. It is handled specially, after copying
644 : : * all the other files.
645 : : */
646 [ + + ]: 15614 : if (strcmp(path, "global/pg_control") == 0)
647 : 13 : return FILE_ACTION_NONE;
648 : :
649 : : /* Skip macOS system files */
61 dgustafsson@postgres 650 [ + + ]: 15601 : if (strstr(path, ".DS_Store") != NULL)
651 : 2 : return FILE_ACTION_NONE;
652 : :
653 : : /*
654 : : * Remove all files matching the exclusion filters in the target.
655 : : */
1257 heikki.linnakangas@i 656 [ + + ]: 15599 : if (check_file_excluded(path, true))
657 : : {
658 [ + + ]: 92 : if (entry->target_exists)
659 : 65 : return FILE_ACTION_REMOVE;
660 : : else
661 : 27 : return FILE_ACTION_NONE;
662 : : }
663 : :
664 : : /*
665 : : * Handle cases where the file is missing from one of the systems.
666 : : */
667 [ + + + - ]: 15507 : if (!entry->target_exists && entry->source_exists)
668 : : {
669 : : /*
670 : : * File exists in source, but not in target. Copy it in toto. (If it's
671 : : * a relation data file, WAL replay after rewinding should re-create
672 : : * it anyway. But there's no harm in copying it now.)
673 : : */
674 [ + + - - ]: 669 : switch (entry->source_type)
675 : : {
676 : 9 : case FILE_TYPE_DIRECTORY:
677 : : case FILE_TYPE_SYMLINK:
678 : 9 : return FILE_ACTION_CREATE;
679 : 660 : case FILE_TYPE_REGULAR:
680 : 660 : return FILE_ACTION_COPY;
1257 heikki.linnakangas@i 681 :UBC 0 : case FILE_TYPE_UNDEFINED:
682 : 0 : pg_fatal("unknown file type for \"%s\"", entry->path);
683 : : break;
684 : : }
685 : : }
1257 heikki.linnakangas@i 686 [ + - + + ]:CBC 14838 : else if (entry->target_exists && !entry->source_exists)
687 : : {
688 : : /* File exists in target, but not source. Remove it. */
689 : 643 : return FILE_ACTION_REMOVE;
690 : : }
691 [ - + - - ]: 14195 : else if (!entry->target_exists && !entry->source_exists)
692 : : {
693 : : /*
694 : : * Doesn't exist in either server. Why does it have an entry in the
695 : : * first place??
696 : : */
1257 heikki.linnakangas@i 697 :UBC 0 : Assert(false);
698 : : return FILE_ACTION_NONE;
699 : : }
700 : :
701 : : /*
702 : : * Otherwise, the file exists on both systems
703 : : */
1257 heikki.linnakangas@i 704 [ + - + - ]:CBC 14195 : Assert(entry->target_exists && entry->source_exists);
705 : :
706 [ - + ]: 14195 : if (entry->source_type != entry->target_type)
707 : : {
708 : : /* But it's a different kind of object. Strange.. */
1257 heikki.linnakangas@i 709 :UBC 0 : pg_fatal("file \"%s\" is of different type in source and target", entry->path);
710 : : }
711 : :
712 : : /*
713 : : * PG_VERSION files should be identical on both systems, but avoid
714 : : * overwriting them for paranoia.
715 : : */
1257 heikki.linnakangas@i 716 [ + + ]:CBC 14195 : if (pg_str_endswith(entry->path, "PG_VERSION"))
717 : 56 : return FILE_ACTION_NONE;
718 : :
719 [ + - + - : 14139 : switch (entry->source_type)
- ]
720 : : {
721 : 356 : case FILE_TYPE_DIRECTORY:
722 : 356 : return FILE_ACTION_NONE;
723 : :
1257 heikki.linnakangas@i 724 :UBC 0 : case FILE_TYPE_SYMLINK:
725 : :
726 : : /*
727 : : * XXX: Should we check if it points to the same target?
728 : : */
729 : 0 : return FILE_ACTION_NONE;
730 : :
1257 heikki.linnakangas@i 731 :CBC 13783 : case FILE_TYPE_REGULAR:
732 [ + + ]: 13783 : if (!entry->isrelfile)
733 : : {
734 : : /*
735 : : * It's a non-data file that we have no special processing
736 : : * for. Copy it in toto.
737 : : */
738 : 3517 : return FILE_ACTION_COPY;
739 : : }
740 : : else
741 : : {
742 : : /*
743 : : * It's a data file that exists in both systems.
744 : : *
745 : : * If it's larger in target, we can truncate it. There will
746 : : * also be a WAL record of the truncation in the source
747 : : * system, so WAL replay would eventually truncate the target
748 : : * too, but we might as well do it now.
749 : : *
750 : : * If it's smaller in the target, it means that it has been
751 : : * truncated in the target, or enlarged in the source, or
752 : : * both. If it was truncated in the target, we need to copy
753 : : * the missing tail from the source system. If it was enlarged
754 : : * in the source system, there will be WAL records in the
755 : : * source system for the new blocks, so we wouldn't need to
756 : : * copy them here. But we don't know which scenario we're
757 : : * dealing with, and there's no harm in copying the missing
758 : : * blocks now, so do it now.
759 : : *
760 : : * If it's the same size, do nothing here. Any blocks modified
761 : : * in the target will be copied based on parsing the target
762 : : * system's WAL, and any blocks modified in the source will be
763 : : * updated after rewinding, when the source system's WAL is
764 : : * replayed.
765 : : */
766 [ + + ]: 10266 : if (entry->target_size < entry->source_size)
767 : 5 : return FILE_ACTION_COPY_TAIL;
768 [ + + ]: 10261 : else if (entry->target_size > entry->source_size)
769 : 4 : return FILE_ACTION_TRUNCATE;
770 : : else
771 : 10257 : return FILE_ACTION_NONE;
772 : : }
773 : : break;
774 : :
1257 heikki.linnakangas@i 775 :UBC 0 : case FILE_TYPE_UNDEFINED:
776 : 0 : pg_fatal("unknown file type for \"%s\"", path);
777 : : break;
778 : : }
779 : :
780 : : /* unreachable */
781 : 0 : pg_fatal("could not decide what to do with file \"%s\"", path);
782 : : }
783 : :
784 : : /*
785 : : * Decide what to do with each file.
786 : : *
787 : : * Returns a 'filemap' with the entries in the order that their actions
788 : : * should be executed.
789 : : */
790 : : filemap_t *
1257 heikki.linnakangas@i 791 :CBC 13 : decide_file_actions(void)
792 : : {
793 : : int i;
794 : : filehash_iterator it;
795 : : file_entry_t *entry;
796 : : filemap_t *filemap;
797 : :
798 : 13 : filehash_start_iterate(filehash, &it);
799 [ + + ]: 15627 : while ((entry = filehash_iterate(filehash, &it)) != NULL)
800 : : {
801 : 15614 : entry->action = decide_file_action(entry);
802 : : }
803 : :
804 : : /*
805 : : * Turn the hash table into an array, and sort in the order that the
806 : : * actions should be performed.
807 : : */
808 : 13 : filemap = pg_malloc(offsetof(filemap_t, entries) +
809 : 13 : filehash->members * sizeof(file_entry_t *));
810 : 13 : filemap->nentries = filehash->members;
811 : 13 : filehash_start_iterate(filehash, &it);
812 : 13 : i = 0;
813 [ + + ]: 15627 : while ((entry = filehash_iterate(filehash, &it)) != NULL)
814 : : {
815 : 15614 : filemap->entries[i++] = entry;
816 : : }
817 : :
818 : 13 : qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
819 : : final_filemap_cmp);
820 : :
821 : 13 : return filemap;
822 : : }
|