LCOV - differential code coverage report
Current view: top level - src/bin/pg_rewind - filemap.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 82.2 % 230 189 41 12 177 12
Current Date: 2023-04-08 15:15:32 Functions: 93.8 % 16 15 1 3 12 2
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * filemap.c
       4                 :  *    A data structure for keeping track of files that have changed.
       5                 :  *
       6                 :  * This source file contains the logic to decide what to do with different
       7                 :  * kinds of files, and the data structure to support it.  Before modifying
       8                 :  * anything, pg_rewind collects information about all the files and their
       9                 :  * attributes in the target and source data directories.  It also scans the
      10                 :  * WAL log in the target, and collects information about data blocks that
      11                 :  * were changed.  All this information is stored in a hash table, using the
      12                 :  * file path relative to the root of the data directory as the key.
      13                 :  *
      14                 :  * After collecting all the information required, the decide_file_actions()
      15                 :  * function scans the hash table and decides what action needs to be taken
      16                 :  * for each file.  Finally, it sorts the array to the final order that the
      17                 :  * actions should be executed in.
      18                 :  *
      19                 :  * Copyright (c) 2013-2023, PostgreSQL Global Development Group
      20                 :  *
      21                 :  *-------------------------------------------------------------------------
      22                 :  */
      23                 : 
      24                 : #include "postgres_fe.h"
      25                 : 
      26                 : #include <sys/stat.h>
      27                 : #include <unistd.h>
      28                 : 
      29                 : #include "catalog/pg_tablespace_d.h"
      30                 : #include "common/hashfn.h"
      31                 : #include "common/string.h"
      32                 : #include "datapagemap.h"
      33                 : #include "filemap.h"
      34                 : #include "pg_rewind.h"
      35                 : #include "storage/fd.h"
      36                 : 
      37                 : /*
      38                 :  * Define a hash table which we can use to store information about the files
      39                 :  * appearing in source and target systems.
      40                 :  */
      41                 : static uint32 hash_string_pointer(const char *s);
      42                 : #define SH_PREFIX       filehash
      43                 : #define SH_ELEMENT_TYPE file_entry_t
      44                 : #define SH_KEY_TYPE     const char *
      45                 : #define SH_KEY          path
      46                 : #define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
      47                 : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
      48                 : #define SH_SCOPE        static inline
      49                 : #define SH_RAW_ALLOCATOR    pg_malloc0
      50                 : #define SH_DECLARE
      51                 : #define SH_DEFINE
      52                 : #include "lib/simplehash.h"
      53                 : 
      54                 : #define FILEHASH_INITIAL_SIZE   1000
      55                 : 
      56                 : static filehash_hash *filehash;
      57                 : 
      58                 : static bool isRelDataFile(const char *path);
      59                 : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
      60                 :                          BlockNumber segno);
      61                 : 
      62                 : static file_entry_t *insert_filehash_entry(const char *path);
      63                 : static file_entry_t *lookup_filehash_entry(const char *path);
      64                 : static int  final_filemap_cmp(const void *a, const void *b);
      65                 : static bool check_file_excluded(const char *path, bool is_source);
      66                 : 
      67                 : /*
      68                 :  * Definition of one element part of an exclusion list, used to exclude
      69                 :  * contents when rewinding.  "name" is the name of the file or path to
      70                 :  * check for exclusion.  If "match_prefix" is true, any items matching
      71                 :  * the name as prefix are excluded.
      72                 :  */
      73                 : struct exclude_list_item
      74                 : {
      75                 :     const char *name;
      76                 :     bool        match_prefix;
      77                 : };
      78                 : 
      79                 : /*
      80                 :  * The contents of these directories are removed or recreated during server
      81                 :  * start so they are not included in data processed by pg_rewind.
      82                 :  *
      83                 :  * Note: those lists should be kept in sync with what basebackup.c provides.
      84                 :  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
      85                 :  * they are defined in backend-only headers.  So this list is maintained
      86                 :  * with a best effort in mind.
      87                 :  */
      88                 : static const char *excludeDirContents[] =
      89                 : {
      90                 :     /*
      91                 :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
      92                 :      * because extensions like pg_stat_statements store data there.
      93                 :      */
      94                 :     "pg_stat_tmp",                /* defined as PG_STAT_TMP_DIR */
      95                 : 
      96                 :     /*
      97                 :      * It is generally not useful to backup the contents of this directory
      98                 :      * even if the intention is to restore to another primary. See backup.sgml
      99                 :      * for a more detailed description.
     100                 :      */
     101                 :     "pg_replslot",
     102                 : 
     103                 :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
     104                 :     "pg_dynshmem",                /* defined as PG_DYNSHMEM_DIR */
     105                 : 
     106                 :     /* Contents removed on startup, see AsyncShmemInit(). */
     107                 :     "pg_notify",
     108                 : 
     109                 :     /*
     110                 :      * Old contents are loaded for possible debugging but are not required for
     111                 :      * normal operation, see SerialInit().
     112                 :      */
     113                 :     "pg_serial",
     114                 : 
     115                 :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
     116                 :     "pg_snapshots",
     117                 : 
     118                 :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
     119                 :     "pg_subtrans",
     120                 : 
     121                 :     /* end of list */
     122                 :     NULL
     123                 : };
     124                 : 
     125                 : /*
     126                 :  * List of files excluded from filemap processing.   Files are excluded
     127                 :  * if their prefix match.
     128                 :  */
     129                 : static const struct exclude_list_item excludeFiles[] =
     130                 : {
     131                 :     /* Skip auto conf temporary file. */
     132                 :     {"postgresql.auto.conf.tmp", false},  /* defined as PG_AUTOCONF_FILENAME */
     133                 : 
     134                 :     /* Skip current log file temporary file */
     135                 :     {"current_logfiles.tmp", false},  /* defined as
     136                 :                                          * LOG_METAINFO_DATAFILE_TMP */
     137                 : 
     138                 :     /* Skip relation cache because it is rebuilt on startup */
     139                 :     {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
     140                 : 
     141                 :     /*
     142                 :      * If there is a backup_label or tablespace_map file, it indicates that a
     143                 :      * recovery failed and this cluster probably can't be rewound, but exclude
     144                 :      * them anyway if they are found.
     145                 :      */
     146                 :     {"backup_label", false},  /* defined as BACKUP_LABEL_FILE */
     147                 :     {"tablespace_map", false},    /* defined as TABLESPACE_MAP */
     148                 : 
     149                 :     /*
     150                 :      * If there's a backup_manifest, it belongs to a backup that was used to
     151                 :      * start this server. It is *not* correct for this backup. Our
     152                 :      * backup_manifest is injected into the backup separately if users want
     153                 :      * it.
     154                 :      */
     155                 :     {"backup_manifest", false},
     156                 : 
     157                 :     {"postmaster.pid", false},
     158                 :     {"postmaster.opts", false},
     159                 : 
     160                 :     /* end of list */
     161                 :     {NULL, false}
     162                 : };
     163                 : 
     164                 : /*
     165                 :  * Initialize the hash table for the file map.
     166                 :  */
     167                 : void
     168 CBC          13 : filehash_init(void)
     169                 : {
     170              13 :     filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
     171              13 : }
     172                 : 
     173                 : /* Look up entry for 'path', creating a new one if it doesn't exist */
     174                 : static file_entry_t *
     175           29791 : insert_filehash_entry(const char *path)
     176                 : {
     177                 :     file_entry_t *entry;
     178                 :     bool        found;
     179                 : 
     180           29791 :     entry = filehash_insert(filehash, path, &found);
     181           29791 :     if (!found)
     182                 :     {
     183           15576 :         entry->path = pg_strdup(path);
     184           15576 :         entry->isrelfile = isRelDataFile(path);
     185                 : 
     186           15576 :         entry->target_exists = false;
     187           15576 :         entry->target_type = FILE_TYPE_UNDEFINED;
     188           15576 :         entry->target_size = 0;
     189           15576 :         entry->target_link_target = NULL;
     190           15576 :         entry->target_pages_to_overwrite.bitmap = NULL;
     191           15576 :         entry->target_pages_to_overwrite.bitmapsize = 0;
     192                 : 
     193           15576 :         entry->source_exists = false;
     194           15576 :         entry->source_type = FILE_TYPE_UNDEFINED;
     195           15576 :         entry->source_size = 0;
     196           15576 :         entry->source_link_target = NULL;
     197                 : 
     198           15576 :         entry->action = FILE_ACTION_UNDECIDED;
     199                 :     }
     200                 : 
     201           29791 :     return entry;
     202                 : }
     203                 : 
     204                 : static file_entry_t *
     205           84782 : lookup_filehash_entry(const char *path)
     206                 : {
     207           84782 :     return filehash_lookup(filehash, path);
     208                 : }
     209                 : 
     210                 : /*
     211                 :  * Callback for processing source file list.
     212                 :  *
     213                 :  * This is called once for every file in the source server.  We record the
     214                 :  * type and size of the file, so that decide_file_action() can later decide what
     215                 :  * to do with it.
     216                 :  */
     217                 : void
     218           14912 : process_source_file(const char *path, file_type_t type, size_t size,
     219                 :                     const char *link_target)
     220                 : {
     221                 :     file_entry_t *entry;
     222                 : 
     223                 :     /*
     224                 :      * Pretend that pg_wal is a directory, even if it's really a symlink. We
     225                 :      * don't want to mess with the symlink itself, nor complain if it's a
     226                 :      * symlink in source but not in target or vice versa.
     227                 :      */
     228           14912 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     229 UBC           0 :         type = FILE_TYPE_DIRECTORY;
     230                 : 
     231                 :     /*
     232                 :      * sanity check: a filename that looks like a data file better be a
     233                 :      * regular file
     234                 :      */
     235 CBC       14912 :     if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
     236 UBC           0 :         pg_fatal("data file \"%s\" in source is not a regular file", path);
     237                 : 
     238                 :     /* Remember this source file */
     239 CBC       14912 :     entry = insert_filehash_entry(path);
     240           14912 :     if (entry->source_exists)
     241 UBC           0 :         pg_fatal("duplicate source file \"%s\"", path);
     242 CBC       14912 :     entry->source_exists = true;
     243           14912 :     entry->source_type = type;
     244           14912 :     entry->source_size = size;
     245           14912 :     entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
     246           14912 : }
     247                 : 
     248                 : /*
     249                 :  * Callback for processing target file list.
     250                 :  *
     251                 :  * Record the type and size of the file, like process_source_file() does.
     252                 :  */
     253                 : void
     254           14879 : process_target_file(const char *path, file_type_t type, size_t size,
     255                 :                     const char *link_target)
     256                 : {
     257                 :     file_entry_t *entry;
     258                 : 
     259                 :     /*
     260                 :      * Do not apply any exclusion filters here.  This has advantage to remove
     261                 :      * from the target data folder all paths which have been filtered out from
     262                 :      * the source data folder when processing the source files.
     263                 :      */
     264                 : 
     265                 :     /*
     266                 :      * Like in process_source_file, pretend that pg_wal is always a directory.
     267                 :      */
     268           14879 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
     269               2 :         type = FILE_TYPE_DIRECTORY;
     270                 : 
     271                 :     /* Remember this target file */
     272           14879 :     entry = insert_filehash_entry(path);
     273           14879 :     if (entry->target_exists)
     274 UBC           0 :         pg_fatal("duplicate source file \"%s\"", path);
     275 CBC       14879 :     entry->target_exists = true;
     276           14879 :     entry->target_type = type;
     277           14879 :     entry->target_size = size;
     278           14879 :     entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
     279           14879 : }
     280                 : 
     281                 : /*
     282                 :  * This callback gets called while we read the WAL in the target, for every
     283                 :  * block that has changed in the target system.  It decides if the given
     284                 :  * 'blkno' in the target relfile needs to be overwritten from the source, and
     285                 :  * if so, records it in 'target_pages_to_overwrite' bitmap.
     286                 :  *
     287                 :  * NOTE: All the files on both systems must have already been added to the
     288                 :  * hash table!
     289                 :  */
     290                 : void
     291 GNC       84782 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
     292                 :                                 BlockNumber blkno)
     293                 : {
     294                 :     char       *path;
     295                 :     file_entry_t *entry;
     296                 :     BlockNumber blkno_inseg;
     297                 :     int         segno;
     298                 : 
     299 CBC       84782 :     segno = blkno / RELSEG_SIZE;
     300           84782 :     blkno_inseg = blkno % RELSEG_SIZE;
     301                 : 
     302 GNC       84782 :     path = datasegpath(rlocator, forknum, segno);
     303 CBC       84782 :     entry = lookup_filehash_entry(path);
     304           84782 :     pfree(path);
     305                 : 
     306                 :     /*
     307                 :      * If the block still exists in both systems, remember it. Otherwise we
     308                 :      * can safely ignore it.
     309                 :      *
     310                 :      * If the block is beyond the EOF in the source system, or the file
     311                 :      * doesn't exist in the source at all, we're going to truncate/remove it
     312                 :      * away from the target anyway. Likewise, if it doesn't exist in the
     313                 :      * target anymore, we will copy it over with the "tail" from the source
     314                 :      * system, anyway.
     315                 :      *
     316                 :      * It is possible to find WAL for a file that doesn't exist on either
     317                 :      * system anymore. It means that the relation was dropped later in the
     318                 :      * target system, and independently on the source system too, or that it
     319                 :      * was created and dropped in the target system and it never existed in
     320                 :      * the source. Either way, we can safely ignore it.
     321                 :      */
     322           84782 :     if (entry)
     323                 :     {
     324           84782 :         Assert(entry->isrelfile);
     325                 : 
     326           84782 :         if (entry->target_exists)
     327                 :         {
     328           84778 :             if (entry->target_type != FILE_TYPE_REGULAR)
     329 UBC           0 :                 pg_fatal("unexpected page modification for non-regular file \"%s\"",
     330                 :                          entry->path);
     331                 : 
     332 CBC       84778 :             if (entry->source_exists)
     333                 :             {
     334                 :                 off_t       end_offset;
     335                 : 
     336           83132 :                 end_offset = (blkno_inseg + 1) * BLCKSZ;
     337           83132 :                 if (end_offset <= entry->source_size && end_offset <= entry->target_size)
     338            2800 :                     datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
     339                 :             }
     340                 :         }
     341                 :     }
     342           84782 : }
     343                 : 
     344                 : /*
     345                 :  * Is this the path of file that pg_rewind can skip copying?
     346                 :  */
     347                 : static bool
     348           15563 : check_file_excluded(const char *path, bool is_source)
     349                 : {
     350                 :     char        localpath[MAXPGPATH];
     351                 :     int         excludeIdx;
     352                 :     const char *filename;
     353                 : 
     354                 :     /*
     355                 :      * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
     356                 :      */
     357           15563 :     if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
     358           15550 :         strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
     359                 :     {
     360              13 :         return true;
     361                 :     }
     362                 : 
     363                 :     /* check individual files... */
     364          139682 :     for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
     365                 :     {
     366          124198 :         int         cmplen = strlen(excludeFiles[excludeIdx].name);
     367                 : 
     368          124198 :         filename = last_dir_separator(path);
     369          124198 :         if (filename == NULL)
     370            2696 :             filename = path;
     371                 :         else
     372          121502 :             filename++;
     373                 : 
     374          124198 :         if (!excludeFiles[excludeIdx].match_prefix)
     375          108648 :             cmplen++;
     376          124198 :         if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
     377                 :         {
     378              66 :             if (is_source)
     379              66 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     380                 :                              path);
     381                 :             else
     382 UBC           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     383                 :                              path);
     384 CBC          66 :             return true;
     385                 :         }
     386                 :     }
     387                 : 
     388                 :     /*
     389                 :      * ... And check some directories.  Note that this includes any contents
     390                 :      * within the directories themselves.
     391                 :      */
     392          123859 :     for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
     393                 :     {
     394          108388 :         snprintf(localpath, sizeof(localpath), "%s/",
     395                 :                  excludeDirContents[excludeIdx]);
     396          108388 :         if (strstr(path, localpath) == path)
     397                 :         {
     398              13 :             if (is_source)
     399              13 :                 pg_log_debug("entry \"%s\" excluded from source file list",
     400                 :                              path);
     401                 :             else
     402 UBC           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
     403                 :                              path);
     404 CBC          13 :             return true;
     405                 :         }
     406                 :     }
     407                 : 
     408           15471 :     return false;
     409                 : }
     410                 : 
     411                 : static const char *
     412            5286 : action_to_str(file_action_t action)
     413                 : {
     414            5286 :     switch (action)
     415                 :     {
     416             383 :         case FILE_ACTION_NONE:
     417             383 :             return "NONE";
     418            4180 :         case FILE_ACTION_COPY:
     419            4180 :             return "COPY";
     420               4 :         case FILE_ACTION_TRUNCATE:
     421               4 :             return "TRUNCATE";
     422               5 :         case FILE_ACTION_COPY_TAIL:
     423               5 :             return "COPY_TAIL";
     424               8 :         case FILE_ACTION_CREATE:
     425               8 :             return "CREATE";
     426             706 :         case FILE_ACTION_REMOVE:
     427             706 :             return "REMOVE";
     428                 : 
     429 UBC           0 :         default:
     430               0 :             return "unknown";
     431                 :     }
     432                 : }
     433                 : 
     434                 : /*
     435                 :  * Calculate the totals needed for progress reports.
     436                 :  */
     437                 : void
     438               0 : calculate_totals(filemap_t *filemap)
     439                 : {
     440                 :     file_entry_t *entry;
     441                 :     int         i;
     442                 : 
     443               0 :     filemap->total_size = 0;
     444               0 :     filemap->fetch_size = 0;
     445                 : 
     446               0 :     for (i = 0; i < filemap->nentries; i++)
     447                 :     {
     448               0 :         entry = filemap->entries[i];
     449                 : 
     450               0 :         if (entry->source_type != FILE_TYPE_REGULAR)
     451               0 :             continue;
     452                 : 
     453               0 :         filemap->total_size += entry->source_size;
     454                 : 
     455               0 :         if (entry->action == FILE_ACTION_COPY)
     456                 :         {
     457               0 :             filemap->fetch_size += entry->source_size;
     458               0 :             continue;
     459                 :         }
     460                 : 
     461               0 :         if (entry->action == FILE_ACTION_COPY_TAIL)
     462               0 :             filemap->fetch_size += (entry->source_size - entry->target_size);
     463                 : 
     464               0 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
     465                 :         {
     466                 :             datapagemap_iterator_t *iter;
     467                 :             BlockNumber blk;
     468                 : 
     469               0 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
     470               0 :             while (datapagemap_next(iter, &blk))
     471               0 :                 filemap->fetch_size += BLCKSZ;
     472                 : 
     473               0 :             pg_free(iter);
     474                 :         }
     475                 :     }
     476               0 : }
     477                 : 
     478                 : void
     479 CBC          13 : print_filemap(filemap_t *filemap)
     480                 : {
     481                 :     file_entry_t *entry;
     482                 :     int         i;
     483                 : 
     484           15589 :     for (i = 0; i < filemap->nentries; i++)
     485                 :     {
     486           15576 :         entry = filemap->entries[i];
     487           15576 :         if (entry->action != FILE_ACTION_NONE ||
     488           10673 :             entry->target_pages_to_overwrite.bitmapsize > 0)
     489                 :         {
     490            5286 :             pg_log_debug("%s (%s)", entry->path,
     491                 :                          action_to_str(entry->action));
     492                 : 
     493            5286 :             if (entry->target_pages_to_overwrite.bitmapsize > 0)
     494             391 :                 datapagemap_print(&entry->target_pages_to_overwrite);
     495                 :         }
     496                 :     }
     497              13 :     fflush(stdout);
     498              13 : }
     499                 : 
     500                 : /*
     501                 :  * Does it look like a relation data file?
     502                 :  *
     503                 :  * For our purposes, only files belonging to the main fork are considered
     504                 :  * relation files. Other forks are always copied in toto, because we cannot
     505                 :  * reliably track changes to them, because WAL only contains block references
     506                 :  * for the main fork.
     507                 :  */
     508                 : static bool
     509           15918 : isRelDataFile(const char *path)
     510                 : {
     511                 :     RelFileLocator rlocator;
     512                 :     unsigned int segNo;
     513                 :     int         nmatch;
     514                 :     bool        matched;
     515                 : 
     516                 :     /*----
     517                 :      * Relation data files can be in one of the following directories:
     518                 :      *
     519                 :      * global/
     520                 :      *      shared relations
     521                 :      *
     522                 :      * base/<db oid>/
     523                 :      *      regular relations, default tablespace
     524                 :      *
     525                 :      * pg_tblspc/<tblspc oid>/<tblspc version>/
     526                 :      *      within a non-default tablespace (the name of the directory
     527                 :      *      depends on version)
     528                 :      *
     529                 :      * And the relation data files themselves have a filename like:
     530                 :      *
     531                 :      * <oid>.<segment number>
     532                 :      *
     533                 :      *----
     534                 :      */
     535 GNC       15918 :     rlocator.spcOid = InvalidOid;
     536           15918 :     rlocator.dbOid = InvalidOid;
     537           15918 :     rlocator.relNumber = InvalidRelFileNumber;
     538 CBC       15918 :     segNo = 0;
     539           15918 :     matched = false;
     540                 : 
     541 GNC       15918 :     nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
     542 CBC       15918 :     if (nmatch == 1 || nmatch == 2)
     543                 :     {
     544 GNC         780 :         rlocator.spcOid = GLOBALTABLESPACE_OID;
     545             780 :         rlocator.dbOid = 0;
     546 CBC         780 :         matched = true;
     547                 :     }
     548                 :     else
     549                 :     {
     550           15138 :         nmatch = sscanf(path, "base/%u/%u.%u",
     551                 :                         &rlocator.dbOid, &rlocator.relNumber, &segNo);
     552           15138 :         if (nmatch == 2 || nmatch == 3)
     553                 :         {
     554 GNC       13998 :             rlocator.spcOid = DEFAULTTABLESPACE_OID;
     555 CBC       13998 :             matched = true;
     556                 :         }
     557                 :         else
     558                 :         {
     559            1140 :             nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
     560                 :                             &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
     561                 :                             &segNo);
     562            1140 :             if (nmatch == 3 || nmatch == 4)
     563 UBC           0 :                 matched = true;
     564                 :         }
     565                 :     }
     566                 : 
     567                 :     /*
     568                 :      * The sscanf tests above can match files that have extra characters at
     569                 :      * the end. To eliminate such cases, cross-check that GetRelationPath
     570                 :      * creates the exact same filename, when passed the RelFileLocator
     571                 :      * information we extracted from the filename.
     572                 :      */
     573 CBC       15918 :     if (matched)
     574                 :     {
     575 GNC       14778 :         char       *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
     576                 : 
     577 CBC       14778 :         if (strcmp(check_path, path) != 0)
     578            3620 :             matched = false;
     579                 : 
     580           14778 :         pfree(check_path);
     581                 :     }
     582                 : 
     583           15918 :     return matched;
     584                 : }
     585                 : 
     586                 : /*
     587                 :  * A helper function to create the path of a relation file and segment.
     588                 :  *
     589                 :  * The returned path is palloc'd
     590                 :  */
     591                 : static char *
     592 GNC       99560 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
     593                 : {
     594                 :     char       *path;
     595                 :     char       *segpath;
     596                 : 
     597           99560 :     path = relpathperm(rlocator, forknum);
     598 CBC       99560 :     if (segno > 0)
     599                 :     {
     600 UBC           0 :         segpath = psprintf("%s.%u", path, segno);
     601               0 :         pfree(path);
     602               0 :         return segpath;
     603                 :     }
     604                 :     else
     605 CBC       99560 :         return path;
     606                 : }
     607                 : 
     608                 : /*
     609                 :  * In the final stage, the filemap is sorted so that removals come last.
     610                 :  * From disk space usage point of view, it would be better to do removals
     611                 :  * first, but for now, safety first. If a whole directory is deleted, all
     612                 :  * files and subdirectories inside it need to removed first. On creation,
     613                 :  * parent directory needs to be created before files and directories inside
     614                 :  * it. To achieve that, the file_action_t enum is ordered so that we can
     615                 :  * just sort on that first. Furthermore, sort REMOVE entries in reverse
     616                 :  * path order, so that "foo/bar" subdirectory is removed before "foo".
     617                 :  */
     618                 : static int
     619          167414 : final_filemap_cmp(const void *a, const void *b)
     620                 : {
     621          167414 :     file_entry_t *fa = *((file_entry_t **) a);
     622          167414 :     file_entry_t *fb = *((file_entry_t **) b);
     623                 : 
     624          167414 :     if (fa->action > fb->action)
     625            6756 :         return 1;
     626          160658 :     if (fa->action < fb->action)
     627            7924 :         return -1;
     628                 : 
     629          152734 :     if (fa->action == FILE_ACTION_REMOVE)
     630            5499 :         return strcmp(fb->path, fa->path);
     631                 :     else
     632          147235 :         return strcmp(fa->path, fb->path);
     633                 : }
     634                 : 
     635                 : /*
     636                 :  * Decide what action to perform to a file.
     637                 :  */
     638                 : static file_action_t
     639           15576 : decide_file_action(file_entry_t *entry)
     640                 : {
     641           15576 :     const char *path = entry->path;
     642                 : 
     643                 :     /*
     644                 :      * Don't touch the control file. It is handled specially, after copying
     645                 :      * all the other files.
     646                 :      */
     647           15576 :     if (strcmp(path, "global/pg_control") == 0)
     648              13 :         return FILE_ACTION_NONE;
     649                 : 
     650                 :     /*
     651                 :      * Remove all files matching the exclusion filters in the target.
     652                 :      */
     653           15563 :     if (check_file_excluded(path, true))
     654                 :     {
     655              92 :         if (entry->target_exists)
     656              65 :             return FILE_ACTION_REMOVE;
     657                 :         else
     658              27 :             return FILE_ACTION_NONE;
     659                 :     }
     660                 : 
     661                 :     /*
     662                 :      * Handle cases where the file is missing from one of the systems.
     663                 :      */
     664           15471 :     if (!entry->target_exists && entry->source_exists)
     665                 :     {
     666                 :         /*
     667                 :          * File exists in source, but not in target. Copy it in toto. (If it's
     668                 :          * a relation data file, WAL replay after rewinding should re-create
     669                 :          * it anyway. But there's no harm in copying it now.)
     670                 :          */
     671             670 :         switch (entry->source_type)
     672                 :         {
     673               8 :             case FILE_TYPE_DIRECTORY:
     674                 :             case FILE_TYPE_SYMLINK:
     675               8 :                 return FILE_ACTION_CREATE;
     676             662 :             case FILE_TYPE_REGULAR:
     677             662 :                 return FILE_ACTION_COPY;
     678 UBC           0 :             case FILE_TYPE_UNDEFINED:
     679               0 :                 pg_fatal("unknown file type for \"%s\"", entry->path);
     680                 :                 break;
     681                 :         }
     682                 :     }
     683 CBC       14801 :     else if (entry->target_exists && !entry->source_exists)
     684                 :     {
     685                 :         /* File exists in target, but not source. Remove it. */
     686             641 :         return FILE_ACTION_REMOVE;
     687                 :     }
     688           14160 :     else if (!entry->target_exists && !entry->source_exists)
     689                 :     {
     690                 :         /*
     691                 :          * Doesn't exist in either server. Why does it have an entry in the
     692                 :          * first place??
     693                 :          */
     694 UBC           0 :         Assert(false);
     695                 :         return FILE_ACTION_NONE;
     696                 :     }
     697                 : 
     698                 :     /*
     699                 :      * Otherwise, the file exists on both systems
     700                 :      */
     701 CBC       14160 :     Assert(entry->target_exists && entry->source_exists);
     702                 : 
     703           14160 :     if (entry->source_type != entry->target_type)
     704                 :     {
     705                 :         /* But it's a different kind of object. Strange.. */
     706 UBC           0 :         pg_fatal("file \"%s\" is of different type in source and target", entry->path);
     707                 :     }
     708                 : 
     709                 :     /*
     710                 :      * PG_VERSION files should be identical on both systems, but avoid
     711                 :      * overwriting them for paranoia.
     712                 :      */
     713 CBC       14160 :     if (pg_str_endswith(entry->path, "PG_VERSION"))
     714              56 :         return FILE_ACTION_NONE;
     715                 : 
     716           14104 :     switch (entry->source_type)
     717                 :     {
     718             332 :         case FILE_TYPE_DIRECTORY:
     719             332 :             return FILE_ACTION_NONE;
     720                 : 
     721 UBC           0 :         case FILE_TYPE_SYMLINK:
     722                 : 
     723                 :             /*
     724                 :              * XXX: Should we check if it points to the same target?
     725                 :              */
     726               0 :             return FILE_ACTION_NONE;
     727                 : 
     728 CBC       13772 :         case FILE_TYPE_REGULAR:
     729           13772 :             if (!entry->isrelfile)
     730                 :             {
     731                 :                 /*
     732                 :                  * It's a non-data file that we have no special processing
     733                 :                  * for. Copy it in toto.
     734                 :                  */
     735            3518 :                 return FILE_ACTION_COPY;
     736                 :             }
     737                 :             else
     738                 :             {
     739                 :                 /*
     740                 :                  * It's a data file that exists in both systems.
     741                 :                  *
     742                 :                  * If it's larger in target, we can truncate it. There will
     743                 :                  * also be a WAL record of the truncation in the source
     744                 :                  * system, so WAL replay would eventually truncate the target
     745                 :                  * too, but we might as well do it now.
     746                 :                  *
     747                 :                  * If it's smaller in the target, it means that it has been
     748                 :                  * truncated in the target, or enlarged in the source, or
     749                 :                  * both. If it was truncated in the target, we need to copy
     750                 :                  * the missing tail from the source system. If it was enlarged
     751                 :                  * in the source system, there will be WAL records in the
     752                 :                  * source system for the new blocks, so we wouldn't need to
     753                 :                  * copy them here. But we don't know which scenario we're
     754                 :                  * dealing with, and there's no harm in copying the missing
     755                 :                  * blocks now, so do it now.
     756                 :                  *
     757                 :                  * If it's the same size, do nothing here. Any blocks modified
     758                 :                  * in the target will be copied based on parsing the target
     759                 :                  * system's WAL, and any blocks modified in the source will be
     760                 :                  * updated after rewinding, when the source system's WAL is
     761                 :                  * replayed.
     762                 :                  */
     763           10254 :                 if (entry->target_size < entry->source_size)
     764               5 :                     return FILE_ACTION_COPY_TAIL;
     765           10249 :                 else if (entry->target_size > entry->source_size)
     766               4 :                     return FILE_ACTION_TRUNCATE;
     767                 :                 else
     768           10245 :                     return FILE_ACTION_NONE;
     769                 :             }
     770                 :             break;
     771                 : 
     772 UBC           0 :         case FILE_TYPE_UNDEFINED:
     773               0 :             pg_fatal("unknown file type for \"%s\"", path);
     774                 :             break;
     775                 :     }
     776                 : 
     777                 :     /* unreachable */
     778               0 :     pg_fatal("could not decide what to do with file \"%s\"", path);
     779                 : }
     780                 : 
     781                 : /*
     782                 :  * Decide what to do with each file.
     783                 :  *
     784                 :  * Returns a 'filemap' with the entries in the order that their actions
     785                 :  * should be executed.
     786                 :  */
     787                 : filemap_t *
     788 CBC          13 : decide_file_actions(void)
     789                 : {
     790                 :     int         i;
     791                 :     filehash_iterator it;
     792                 :     file_entry_t *entry;
     793                 :     filemap_t  *filemap;
     794                 : 
     795              13 :     filehash_start_iterate(filehash, &it);
     796           15589 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     797                 :     {
     798           15576 :         entry->action = decide_file_action(entry);
     799                 :     }
     800                 : 
     801                 :     /*
     802                 :      * Turn the hash table into an array, and sort in the order that the
     803                 :      * actions should be performed.
     804                 :      */
     805              13 :     filemap = pg_malloc(offsetof(filemap_t, entries) +
     806              13 :                         filehash->members * sizeof(file_entry_t *));
     807              13 :     filemap->nentries = filehash->members;
     808              13 :     filehash_start_iterate(filehash, &it);
     809              13 :     i = 0;
     810           15589 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
     811                 :     {
     812           15576 :         filemap->entries[i++] = entry;
     813                 :     }
     814                 : 
     815              13 :     qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
     816                 :           final_filemap_cmp);
     817                 : 
     818              13 :     return filemap;
     819                 : }
     820                 : 
     821                 : 
     822                 : /*
     823                 :  * Helper function for filemap hash table.
     824                 :  */
     825                 : static uint32
     826          133990 : hash_string_pointer(const char *s)
     827                 : {
     828          133990 :     unsigned char *ss = (unsigned char *) s;
     829                 : 
     830          133990 :     return hash_bytes(ss, strlen(s));
     831                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a