LCOV - differential code coverage report
Current view: top level - src/bin/pg_rewind - filemap.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 82.2 % 230 189 41 12 177 12
Current Date: 2023-04-08 17:13:01 Functions: 93.8 % 16 15 1 3 12 2
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (180,240] days: 100.0 % 3 3 1 2
Legend: Lines: hit not hit (240..) days: 81.9 % 227 186 41 11 175
Function coverage date bins:
(240..) days: 93.8 % 16 15 1 3 12

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * filemap.c
                                  4                 :  *    A data structure for keeping track of files that have changed.
                                  5                 :  *
                                  6                 :  * This source file contains the logic to decide what to do with different
                                  7                 :  * kinds of files, and the data structure to support it.  Before modifying
                                  8                 :  * anything, pg_rewind collects information about all the files and their
                                  9                 :  * attributes in the target and source data directories.  It also scans the
                                 10                 :  * WAL log in the target, and collects information about data blocks that
                                 11                 :  * were changed.  All this information is stored in a hash table, using the
                                 12                 :  * file path relative to the root of the data directory as the key.
                                 13                 :  *
                                 14                 :  * After collecting all the information required, the decide_file_actions()
                                 15                 :  * function scans the hash table and decides what action needs to be taken
                                 16                 :  * for each file.  Finally, it sorts the array to the final order that the
                                 17                 :  * actions should be executed in.
                                 18                 :  *
                                 19                 :  * Copyright (c) 2013-2023, PostgreSQL Global Development Group
                                 20                 :  *
                                 21                 :  *-------------------------------------------------------------------------
                                 22                 :  */
                                 23                 : 
                                 24                 : #include "postgres_fe.h"
                                 25                 : 
                                 26                 : #include <sys/stat.h>
                                 27                 : #include <unistd.h>
                                 28                 : 
                                 29                 : #include "catalog/pg_tablespace_d.h"
                                 30                 : #include "common/hashfn.h"
                                 31                 : #include "common/string.h"
                                 32                 : #include "datapagemap.h"
                                 33                 : #include "filemap.h"
                                 34                 : #include "pg_rewind.h"
                                 35                 : #include "storage/fd.h"
                                 36                 : 
                                 37                 : /*
                                 38                 :  * Define a hash table which we can use to store information about the files
                                 39                 :  * appearing in source and target systems.
                                 40                 :  */
                                 41                 : static uint32 hash_string_pointer(const char *s);
                                 42                 : #define SH_PREFIX       filehash
                                 43                 : #define SH_ELEMENT_TYPE file_entry_t
                                 44                 : #define SH_KEY_TYPE     const char *
                                 45                 : #define SH_KEY          path
                                 46                 : #define SH_HASH_KEY(tb, key)    hash_string_pointer(key)
                                 47                 : #define SH_EQUAL(tb, a, b)      (strcmp(a, b) == 0)
                                 48                 : #define SH_SCOPE        static inline
                                 49                 : #define SH_RAW_ALLOCATOR    pg_malloc0
                                 50                 : #define SH_DECLARE
                                 51                 : #define SH_DEFINE
                                 52                 : #include "lib/simplehash.h"
                                 53                 : 
                                 54                 : #define FILEHASH_INITIAL_SIZE   1000
                                 55                 : 
                                 56                 : static filehash_hash *filehash;
                                 57                 : 
                                 58                 : static bool isRelDataFile(const char *path);
                                 59                 : static char *datasegpath(RelFileLocator rlocator, ForkNumber forknum,
                                 60                 :                          BlockNumber segno);
                                 61                 : 
                                 62                 : static file_entry_t *insert_filehash_entry(const char *path);
                                 63                 : static file_entry_t *lookup_filehash_entry(const char *path);
                                 64                 : static int  final_filemap_cmp(const void *a, const void *b);
                                 65                 : static bool check_file_excluded(const char *path, bool is_source);
                                 66                 : 
                                 67                 : /*
                                 68                 :  * Definition of one element part of an exclusion list, used to exclude
                                 69                 :  * contents when rewinding.  "name" is the name of the file or path to
                                 70                 :  * check for exclusion.  If "match_prefix" is true, any items matching
                                 71                 :  * the name as prefix are excluded.
                                 72                 :  */
                                 73                 : struct exclude_list_item
                                 74                 : {
                                 75                 :     const char *name;
                                 76                 :     bool        match_prefix;
                                 77                 : };
                                 78                 : 
                                 79                 : /*
                                 80                 :  * The contents of these directories are removed or recreated during server
                                 81                 :  * start so they are not included in data processed by pg_rewind.
                                 82                 :  *
                                 83                 :  * Note: those lists should be kept in sync with what basebackup.c provides.
                                 84                 :  * Some of the values, contrary to what basebackup.c uses, are hardcoded as
                                 85                 :  * they are defined in backend-only headers.  So this list is maintained
                                 86                 :  * with a best effort in mind.
                                 87                 :  */
                                 88                 : static const char *excludeDirContents[] =
                                 89                 : {
                                 90                 :     /*
                                 91                 :      * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped
                                 92                 :      * because extensions like pg_stat_statements store data there.
                                 93                 :      */
                                 94                 :     "pg_stat_tmp",                /* defined as PG_STAT_TMP_DIR */
                                 95                 : 
                                 96                 :     /*
                                 97                 :      * It is generally not useful to backup the contents of this directory
                                 98                 :      * even if the intention is to restore to another primary. See backup.sgml
                                 99                 :      * for a more detailed description.
                                100                 :      */
                                101                 :     "pg_replslot",
                                102                 : 
                                103                 :     /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
                                104                 :     "pg_dynshmem",                /* defined as PG_DYNSHMEM_DIR */
                                105                 : 
                                106                 :     /* Contents removed on startup, see AsyncShmemInit(). */
                                107                 :     "pg_notify",
                                108                 : 
                                109                 :     /*
                                110                 :      * Old contents are loaded for possible debugging but are not required for
                                111                 :      * normal operation, see SerialInit().
                                112                 :      */
                                113                 :     "pg_serial",
                                114                 : 
                                115                 :     /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
                                116                 :     "pg_snapshots",
                                117                 : 
                                118                 :     /* Contents zeroed on startup, see StartupSUBTRANS(). */
                                119                 :     "pg_subtrans",
                                120                 : 
                                121                 :     /* end of list */
                                122                 :     NULL
                                123                 : };
                                124                 : 
                                125                 : /*
                                126                 :  * List of files excluded from filemap processing.   Files are excluded
                                127                 :  * if their prefix match.
                                128                 :  */
                                129                 : static const struct exclude_list_item excludeFiles[] =
                                130                 : {
                                131                 :     /* Skip auto conf temporary file. */
                                132                 :     {"postgresql.auto.conf.tmp", false},  /* defined as PG_AUTOCONF_FILENAME */
                                133                 : 
                                134                 :     /* Skip current log file temporary file */
                                135                 :     {"current_logfiles.tmp", false},  /* defined as
                                136                 :                                          * LOG_METAINFO_DATAFILE_TMP */
                                137                 : 
                                138                 :     /* Skip relation cache because it is rebuilt on startup */
                                139                 :     {"pg_internal.init", true}, /* defined as RELCACHE_INIT_FILENAME */
                                140                 : 
                                141                 :     /*
                                142                 :      * If there is a backup_label or tablespace_map file, it indicates that a
                                143                 :      * recovery failed and this cluster probably can't be rewound, but exclude
                                144                 :      * them anyway if they are found.
                                145                 :      */
                                146                 :     {"backup_label", false},  /* defined as BACKUP_LABEL_FILE */
                                147                 :     {"tablespace_map", false},    /* defined as TABLESPACE_MAP */
                                148                 : 
                                149                 :     /*
                                150                 :      * If there's a backup_manifest, it belongs to a backup that was used to
                                151                 :      * start this server. It is *not* correct for this backup. Our
                                152                 :      * backup_manifest is injected into the backup separately if users want
                                153                 :      * it.
                                154                 :      */
                                155                 :     {"backup_manifest", false},
                                156                 : 
                                157                 :     {"postmaster.pid", false},
                                158                 :     {"postmaster.opts", false},
                                159                 : 
                                160                 :     /* end of list */
                                161                 :     {NULL, false}
                                162                 : };
                                163                 : 
                                164                 : /*
                                165                 :  * Initialize the hash table for the file map.
                                166                 :  */
                                167                 : void
  886 heikki.linnakangas        168 CBC          13 : filehash_init(void)
                                169                 : {
                                170              13 :     filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
 2939                           171              13 : }
                                172                 : 
                                173                 : /* Look up entry for 'path', creating a new one if it doesn't exist */
                                174                 : static file_entry_t *
  886                           175           29791 : insert_filehash_entry(const char *path)
                                176                 : {
                                177                 :     file_entry_t *entry;
                                178                 :     bool        found;
                                179                 : 
                                180           29791 :     entry = filehash_insert(filehash, path, &found);
                                181           29791 :     if (!found)
                                182                 :     {
                                183           15576 :         entry->path = pg_strdup(path);
                                184           15576 :         entry->isrelfile = isRelDataFile(path);
                                185                 : 
                                186           15576 :         entry->target_exists = false;
                                187           15576 :         entry->target_type = FILE_TYPE_UNDEFINED;
                                188           15576 :         entry->target_size = 0;
                                189           15576 :         entry->target_link_target = NULL;
                                190           15576 :         entry->target_pages_to_overwrite.bitmap = NULL;
                                191           15576 :         entry->target_pages_to_overwrite.bitmapsize = 0;
                                192                 : 
                                193           15576 :         entry->source_exists = false;
                                194           15576 :         entry->source_type = FILE_TYPE_UNDEFINED;
                                195           15576 :         entry->source_size = 0;
                                196           15576 :         entry->source_link_target = NULL;
                                197                 : 
                                198           15576 :         entry->action = FILE_ACTION_UNDECIDED;
                                199                 :     }
                                200                 : 
                                201           29791 :     return entry;
                                202                 : }
                                203                 : 
                                204                 : static file_entry_t *
                                205           84782 : lookup_filehash_entry(const char *path)
                                206                 : {
                                207           84782 :     return filehash_lookup(filehash, path);
                                208                 : }
                                209                 : 
                                210                 : /*
                                211                 :  * Callback for processing source file list.
                                212                 :  *
                                213                 :  * This is called once for every file in the source server.  We record the
                                214                 :  * type and size of the file, so that decide_file_action() can later decide what
                                215                 :  * to do with it.
                                216                 :  */
                                217                 : void
                                218           14912 : process_source_file(const char *path, file_type_t type, size_t size,
                                219                 :                     const char *link_target)
                                220                 : {
                                221                 :     file_entry_t *entry;
                                222                 : 
                                223                 :     /*
                                224                 :      * Pretend that pg_wal is a directory, even if it's really a symlink. We
                                225                 :      * don't want to mess with the symlink itself, nor complain if it's a
                                226                 :      * symlink in source but not in target or vice versa.
                                227                 :      */
 2362 rhaas                     228           14912 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
 2806 heikki.linnakangas        229 UBC           0 :         type = FILE_TYPE_DIRECTORY;
                                230                 : 
                                231                 :     /*
                                232                 :      * sanity check: a filename that looks like a data file better be a
                                233                 :      * regular file
                                234                 :      */
 2939 heikki.linnakangas        235 CBC       14912 :     if (type != FILE_TYPE_REGULAR && isRelDataFile(path))
 1469 peter                     236 UBC           0 :         pg_fatal("data file \"%s\" in source is not a regular file", path);
                                237                 : 
                                238                 :     /* Remember this source file */
  886 heikki.linnakangas        239 CBC       14912 :     entry = insert_filehash_entry(path);
                                240           14912 :     if (entry->source_exists)
  886 heikki.linnakangas        241 UBC           0 :         pg_fatal("duplicate source file \"%s\"", path);
  886 heikki.linnakangas        242 CBC       14912 :     entry->source_exists = true;
                                243           14912 :     entry->source_type = type;
                                244           14912 :     entry->source_size = size;
                                245           14912 :     entry->source_link_target = link_target ? pg_strdup(link_target) : NULL;
 2939                           246           14912 : }
                                247                 : 
                                248                 : /*
                                249                 :  * Callback for processing target file list.
                                250                 :  *
                                251                 :  * Record the type and size of the file, like process_source_file() does.
                                252                 :  */
                                253                 : void
  886                           254           14879 : process_target_file(const char *path, file_type_t type, size_t size,
                                255                 :                     const char *link_target)
                                256                 : {
                                257                 :     file_entry_t *entry;
                                258                 : 
                                259                 :     /*
                                260                 :      * Do not apply any exclusion filters here.  This has advantage to remove
                                261                 :      * from the target data folder all paths which have been filtered out from
                                262                 :      * the source data folder when processing the source files.
                                263                 :      */
                                264                 : 
                                265                 :     /*
                                266                 :      * Like in process_source_file, pretend that pg_wal is always a directory.
                                267                 :      */
 2362 rhaas                     268           14879 :     if (strcmp(path, "pg_wal") == 0 && type == FILE_TYPE_SYMLINK)
 2806 heikki.linnakangas        269               2 :         type = FILE_TYPE_DIRECTORY;
                                270                 : 
                                271                 :     /* Remember this target file */
  886                           272           14879 :     entry = insert_filehash_entry(path);
                                273           14879 :     if (entry->target_exists)
  886 heikki.linnakangas        274 UBC           0 :         pg_fatal("duplicate source file \"%s\"", path);
  886 heikki.linnakangas        275 CBC       14879 :     entry->target_exists = true;
                                276           14879 :     entry->target_type = type;
                                277           14879 :     entry->target_size = size;
                                278           14879 :     entry->target_link_target = link_target ? pg_strdup(link_target) : NULL;
 2939                           279           14879 : }
                                280                 : 
                                281                 : /*
                                282                 :  * This callback gets called while we read the WAL in the target, for every
                                283                 :  * block that has changed in the target system.  It decides if the given
                                284                 :  * 'blkno' in the target relfile needs to be overwritten from the source, and
                                285                 :  * if so, records it in 'target_pages_to_overwrite' bitmap.
                                286                 :  *
                                287                 :  * NOTE: All the files on both systems must have already been added to the
                                288                 :  * hash table!
                                289                 :  */
                                290                 : void
  277 rhaas                     291 GNC       84782 : process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
                                292                 :                                 BlockNumber blkno)
                                293                 : {
                                294                 :     char       *path;
                                295                 :     file_entry_t *entry;
                                296                 :     BlockNumber blkno_inseg;
                                297                 :     int         segno;
                                298                 : 
 2939 heikki.linnakangas        299 CBC       84782 :     segno = blkno / RELSEG_SIZE;
                                300           84782 :     blkno_inseg = blkno % RELSEG_SIZE;
                                301                 : 
  277 rhaas                     302 GNC       84782 :     path = datasegpath(rlocator, forknum, segno);
  886 heikki.linnakangas        303 CBC       84782 :     entry = lookup_filehash_entry(path);
 2859 fujii                     304           84782 :     pfree(path);
                                305                 : 
                                306                 :     /*
                                307                 :      * If the block still exists in both systems, remember it. Otherwise we
                                308                 :      * can safely ignore it.
                                309                 :      *
                                310                 :      * If the block is beyond the EOF in the source system, or the file
                                311                 :      * doesn't exist in the source at all, we're going to truncate/remove it
                                312                 :      * away from the target anyway. Likewise, if it doesn't exist in the
                                313                 :      * target anymore, we will copy it over with the "tail" from the source
                                314                 :      * system, anyway.
                                315                 :      *
                                316                 :      * It is possible to find WAL for a file that doesn't exist on either
                                317                 :      * system anymore. It means that the relation was dropped later in the
                                318                 :      * target system, and independently on the source system too, or that it
                                319                 :      * was created and dropped in the target system and it never existed in
                                320                 :      * the source. Either way, we can safely ignore it.
                                321                 :      */
 2939 heikki.linnakangas        322           84782 :     if (entry)
                                323                 :     {
  886                           324           84782 :         Assert(entry->isrelfile);
                                325                 : 
  880                           326           84782 :         if (entry->target_exists)
                                327                 :         {
                                328           84778 :             if (entry->target_type != FILE_TYPE_REGULAR)
  880 heikki.linnakangas        329 UBC           0 :                 pg_fatal("unexpected page modification for non-regular file \"%s\"",
                                330                 :                          entry->path);
                                331                 : 
  880 heikki.linnakangas        332 CBC       84778 :             if (entry->source_exists)
                                333                 :             {
                                334                 :                 off_t       end_offset;
                                335                 : 
                                336           83132 :                 end_offset = (blkno_inseg + 1) * BLCKSZ;
                                337           83132 :                 if (end_offset <= entry->source_size && end_offset <= entry->target_size)
                                338            2800 :                     datapagemap_add(&entry->target_pages_to_overwrite, blkno_inseg);
                                339                 :             }
                                340                 :         }
                                341                 :     }
 2939                           342           84782 : }
                                343                 : 
                                344                 : /*
                                345                 :  * Is this the path of file that pg_rewind can skip copying?
                                346                 :  */
                                347                 : static bool
 1669 peter_e                   348           15563 : check_file_excluded(const char *path, bool is_source)
                                349                 : {
                                350                 :     char        localpath[MAXPGPATH];
                                351                 :     int         excludeIdx;
                                352                 :     const char *filename;
                                353                 : 
                                354                 :     /*
                                355                 :      * Skip all temporary files, .../pgsql_tmp/... and .../pgsql_tmp.*
                                356                 :      */
  886 heikki.linnakangas        357           15563 :     if (strstr(path, "/" PG_TEMP_FILE_PREFIX) != NULL ||
                                358           15550 :         strstr(path, "/" PG_TEMP_FILES_DIR "/") != NULL)
                                359                 :     {
                                360              13 :         return true;
                                361                 :     }
                                362                 : 
                                363                 :     /* check individual files... */
 1140 michael                   364          139682 :     for (excludeIdx = 0; excludeFiles[excludeIdx].name != NULL; excludeIdx++)
                                365                 :     {
                                366          124198 :         int         cmplen = strlen(excludeFiles[excludeIdx].name);
                                367                 : 
 1837 fujii                     368          124198 :         filename = last_dir_separator(path);
                                369          124198 :         if (filename == NULL)
                                370            2696 :             filename = path;
                                371                 :         else
                                372          121502 :             filename++;
                                373                 : 
 1140 michael                   374          124198 :         if (!excludeFiles[excludeIdx].match_prefix)
                                375          108648 :             cmplen++;
                                376          124198 :         if (strncmp(filename, excludeFiles[excludeIdx].name, cmplen) == 0)
                                377                 :         {
 1669 peter_e                   378              66 :             if (is_source)
 1469 peter                     379              66 :                 pg_log_debug("entry \"%s\" excluded from source file list",
                                380                 :                              path);
                                381                 :             else
 1469 peter                     382 UBC           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
                                383                 :                              path);
 1837 fujii                     384 CBC          66 :             return true;
                                385                 :         }
                                386                 :     }
                                387                 : 
                                388                 :     /*
                                389                 :      * ... And check some directories.  Note that this includes any contents
                                390                 :      * within the directories themselves.
                                391                 :      */
                                392          123859 :     for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
                                393                 :     {
                                394          108388 :         snprintf(localpath, sizeof(localpath), "%s/",
                                395                 :                  excludeDirContents[excludeIdx]);
                                396          108388 :         if (strstr(path, localpath) == path)
                                397                 :         {
 1669 peter_e                   398              13 :             if (is_source)
 1469 peter                     399              13 :                 pg_log_debug("entry \"%s\" excluded from source file list",
                                400                 :                              path);
                                401                 :             else
 1469 peter                     402 UBC           0 :                 pg_log_debug("entry \"%s\" excluded from target file list",
                                403                 :                              path);
 1837 fujii                     404 CBC          13 :             return true;
                                405                 :         }
                                406                 :     }
                                407                 : 
                                408           15471 :     return false;
                                409                 : }
                                410                 : 
                                411                 : static const char *
 2939 heikki.linnakangas        412            5286 : action_to_str(file_action_t action)
                                413                 : {
                                414            5286 :     switch (action)
                                415                 :     {
                                416             383 :         case FILE_ACTION_NONE:
                                417             383 :             return "NONE";
                                418            4180 :         case FILE_ACTION_COPY:
                                419            4180 :             return "COPY";
                                420               4 :         case FILE_ACTION_TRUNCATE:
                                421               4 :             return "TRUNCATE";
                                422               5 :         case FILE_ACTION_COPY_TAIL:
                                423               5 :             return "COPY_TAIL";
                                424               8 :         case FILE_ACTION_CREATE:
                                425               8 :             return "CREATE";
                                426             706 :         case FILE_ACTION_REMOVE:
                                427             706 :             return "REMOVE";
                                428                 : 
 2939 heikki.linnakangas        429 UBC           0 :         default:
                                430               0 :             return "unknown";
                                431                 :     }
                                432                 : }
                                433                 : 
                                434                 : /*
                                435                 :  * Calculate the totals needed for progress reports.
                                436                 :  */
                                437                 : void
  886                           438               0 : calculate_totals(filemap_t *filemap)
                                439                 : {
                                440                 :     file_entry_t *entry;
                                441                 :     int         i;
                                442                 : 
                                443               0 :     filemap->total_size = 0;
                                444               0 :     filemap->fetch_size = 0;
                                445                 : 
                                446               0 :     for (i = 0; i < filemap->nentries; i++)
                                447                 :     {
                                448               0 :         entry = filemap->entries[i];
                                449                 : 
                                450               0 :         if (entry->source_type != FILE_TYPE_REGULAR)
 2939                           451               0 :             continue;
                                452                 : 
  886                           453               0 :         filemap->total_size += entry->source_size;
                                454                 : 
 2939                           455               0 :         if (entry->action == FILE_ACTION_COPY)
                                456                 :         {
  886                           457               0 :             filemap->fetch_size += entry->source_size;
 2939                           458               0 :             continue;
                                459                 :         }
                                460                 : 
                                461               0 :         if (entry->action == FILE_ACTION_COPY_TAIL)
  886                           462               0 :             filemap->fetch_size += (entry->source_size - entry->target_size);
                                463                 : 
                                464               0 :         if (entry->target_pages_to_overwrite.bitmapsize > 0)
                                465                 :         {
                                466                 :             datapagemap_iterator_t *iter;
                                467                 :             BlockNumber blk;
                                468                 : 
                                469               0 :             iter = datapagemap_iterate(&entry->target_pages_to_overwrite);
 2939                           470               0 :             while (datapagemap_next(iter, &blk))
  886                           471               0 :                 filemap->fetch_size += BLCKSZ;
                                472                 : 
 2939                           473               0 :             pg_free(iter);
                                474                 :         }
                                475                 :     }
                                476               0 : }
                                477                 : 
                                478                 : void
  886 heikki.linnakangas        479 CBC          13 : print_filemap(filemap_t *filemap)
                                480                 : {
                                481                 :     file_entry_t *entry;
                                482                 :     int         i;
                                483                 : 
                                484           15589 :     for (i = 0; i < filemap->nentries; i++)
                                485                 :     {
                                486           15576 :         entry = filemap->entries[i];
 2939                           487           15576 :         if (entry->action != FILE_ACTION_NONE ||
  886                           488           10673 :             entry->target_pages_to_overwrite.bitmapsize > 0)
                                489                 :         {
 1469 peter                     490            5286 :             pg_log_debug("%s (%s)", entry->path,
                                491                 :                          action_to_str(entry->action));
                                492                 : 
  886 heikki.linnakangas        493            5286 :             if (entry->target_pages_to_overwrite.bitmapsize > 0)
                                494             391 :                 datapagemap_print(&entry->target_pages_to_overwrite);
                                495                 :         }
                                496                 :     }
 2939                           497              13 :     fflush(stdout);
                                498              13 : }
                                499                 : 
                                500                 : /*
                                501                 :  * Does it look like a relation data file?
                                502                 :  *
                                503                 :  * For our purposes, only files belonging to the main fork are considered
                                504                 :  * relation files. Other forks are always copied in toto, because we cannot
                                505                 :  * reliably track changes to them, because WAL only contains block references
                                506                 :  * for the main fork.
                                507                 :  */
                                508                 : static bool
                                509           15918 : isRelDataFile(const char *path)
                                510                 : {
                                511                 :     RelFileLocator rlocator;
                                512                 :     unsigned int segNo;
                                513                 :     int         nmatch;
                                514                 :     bool        matched;
                                515                 : 
                                516                 :     /*----
                                517                 :      * Relation data files can be in one of the following directories:
                                518                 :      *
                                519                 :      * global/
                                520                 :      *      shared relations
                                521                 :      *
                                522                 :      * base/<db oid>/
                                523                 :      *      regular relations, default tablespace
                                524                 :      *
                                525                 :      * pg_tblspc/<tblspc oid>/<tblspc version>/
                                526                 :      *      within a non-default tablespace (the name of the directory
                                527                 :      *      depends on version)
                                528                 :      *
                                529                 :      * And the relation data files themselves have a filename like:
                                530                 :      *
                                531                 :      * <oid>.<segment number>
                                532                 :      *
                                533                 :      *----
                                534                 :      */
  277 rhaas                     535 GNC       15918 :     rlocator.spcOid = InvalidOid;
                                536           15918 :     rlocator.dbOid = InvalidOid;
                                537           15918 :     rlocator.relNumber = InvalidRelFileNumber;
 2939 heikki.linnakangas        538 CBC       15918 :     segNo = 0;
                                539           15918 :     matched = false;
                                540                 : 
  193 rhaas                     541 GNC       15918 :     nmatch = sscanf(path, "global/%u.%u", &rlocator.relNumber, &segNo);
 2939 heikki.linnakangas        542 CBC       15918 :     if (nmatch == 1 || nmatch == 2)
                                543                 :     {
  277 rhaas                     544 GNC         780 :         rlocator.spcOid = GLOBALTABLESPACE_OID;
                                545             780 :         rlocator.dbOid = 0;
 2939 heikki.linnakangas        546 CBC         780 :         matched = true;
                                547                 :     }
                                548                 :     else
                                549                 :     {
  193 rhaas                     550           15138 :         nmatch = sscanf(path, "base/%u/%u.%u",
                                551                 :                         &rlocator.dbOid, &rlocator.relNumber, &segNo);
 2939 heikki.linnakangas        552           15138 :         if (nmatch == 2 || nmatch == 3)
                                553                 :         {
  277 rhaas                     554 GNC       13998 :             rlocator.spcOid = DEFAULTTABLESPACE_OID;
 2939 heikki.linnakangas        555 CBC       13998 :             matched = true;
                                556                 :         }
                                557                 :         else
                                558                 :         {
  193 rhaas                     559            1140 :             nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u",
                                560                 :                             &rlocator.spcOid, &rlocator.dbOid, &rlocator.relNumber,
                                561                 :                             &segNo);
 1860 fujii                     562            1140 :             if (nmatch == 3 || nmatch == 4)
 2939 heikki.linnakangas        563 UBC           0 :                 matched = true;
                                564                 :         }
                                565                 :     }
                                566                 : 
                                567                 :     /*
                                568                 :      * The sscanf tests above can match files that have extra characters at
                                569                 :      * the end. To eliminate such cases, cross-check that GetRelationPath
                                570                 :      * creates the exact same filename, when passed the RelFileLocator
                                571                 :      * information we extracted from the filename.
                                572                 :      */
 2939 heikki.linnakangas        573 CBC       15918 :     if (matched)
                                574                 :     {
  277 rhaas                     575 GNC       14778 :         char       *check_path = datasegpath(rlocator, MAIN_FORKNUM, segNo);
                                576                 : 
 2939 heikki.linnakangas        577 CBC       14778 :         if (strcmp(check_path, path) != 0)
                                578            3620 :             matched = false;
                                579                 : 
                                580           14778 :         pfree(check_path);
                                581                 :     }
                                582                 : 
                                583           15918 :     return matched;
                                584                 : }
                                585                 : 
                                586                 : /*
                                587                 :  * A helper function to create the path of a relation file and segment.
                                588                 :  *
                                589                 :  * The returned path is palloc'd
                                590                 :  */
                                591                 : static char *
  277 rhaas                     592 GNC       99560 : datasegpath(RelFileLocator rlocator, ForkNumber forknum, BlockNumber segno)
                                593                 : {
                                594                 :     char       *path;
                                595                 :     char       *segpath;
                                596                 : 
                                597           99560 :     path = relpathperm(rlocator, forknum);
 2939 heikki.linnakangas        598 CBC       99560 :     if (segno > 0)
                                599                 :     {
 2939 heikki.linnakangas        600 UBC           0 :         segpath = psprintf("%s.%u", path, segno);
                                601               0 :         pfree(path);
                                602               0 :         return segpath;
                                603                 :     }
                                604                 :     else
 2939 heikki.linnakangas        605 CBC       99560 :         return path;
                                606                 : }
                                607                 : 
                                608                 : /*
                                609                 :  * In the final stage, the filemap is sorted so that removals come last.
                                610                 :  * From disk space usage point of view, it would be better to do removals
                                611                 :  * first, but for now, safety first. If a whole directory is deleted, all
                                612                 :  * files and subdirectories inside it need to removed first. On creation,
                                613                 :  * parent directory needs to be created before files and directories inside
                                614                 :  * it. To achieve that, the file_action_t enum is ordered so that we can
                                615                 :  * just sort on that first. Furthermore, sort REMOVE entries in reverse
                                616                 :  * path order, so that "foo/bar" subdirectory is removed before "foo".
                                617                 :  */
                                618                 : static int
                                619          167414 : final_filemap_cmp(const void *a, const void *b)
                                620                 : {
                                621          167414 :     file_entry_t *fa = *((file_entry_t **) a);
                                622          167414 :     file_entry_t *fb = *((file_entry_t **) b);
                                623                 : 
                                624          167414 :     if (fa->action > fb->action)
                                625            6756 :         return 1;
                                626          160658 :     if (fa->action < fb->action)
                                627            7924 :         return -1;
                                628                 : 
                                629          152734 :     if (fa->action == FILE_ACTION_REMOVE)
 1647 tgl                       630            5499 :         return strcmp(fb->path, fa->path);
                                631                 :     else
 2939 heikki.linnakangas        632          147235 :         return strcmp(fa->path, fb->path);
                                633                 : }
                                634                 : 
                                635                 : /*
                                636                 :  * Decide what action to perform to a file.
                                637                 :  */
                                638                 : static file_action_t
  886                           639           15576 : decide_file_action(file_entry_t *entry)
                                640                 : {
                                641           15576 :     const char *path = entry->path;
                                642                 : 
                                643                 :     /*
                                644                 :      * Don't touch the control file. It is handled specially, after copying
                                645                 :      * all the other files.
                                646                 :      */
                                647           15576 :     if (strcmp(path, "global/pg_control") == 0)
                                648              13 :         return FILE_ACTION_NONE;
                                649                 : 
                                650                 :     /*
                                651                 :      * Remove all files matching the exclusion filters in the target.
                                652                 :      */
                                653           15563 :     if (check_file_excluded(path, true))
                                654                 :     {
                                655              92 :         if (entry->target_exists)
                                656              65 :             return FILE_ACTION_REMOVE;
                                657                 :         else
                                658              27 :             return FILE_ACTION_NONE;
                                659                 :     }
                                660                 : 
                                661                 :     /*
                                662                 :      * Handle cases where the file is missing from one of the systems.
                                663                 :      */
                                664           15471 :     if (!entry->target_exists && entry->source_exists)
                                665                 :     {
                                666                 :         /*
                                667                 :          * File exists in source, but not in target. Copy it in toto. (If it's
                                668                 :          * a relation data file, WAL replay after rewinding should re-create
                                669                 :          * it anyway. But there's no harm in copying it now.)
                                670                 :          */
                                671             670 :         switch (entry->source_type)
                                672                 :         {
                                673               8 :             case FILE_TYPE_DIRECTORY:
                                674                 :             case FILE_TYPE_SYMLINK:
                                675               8 :                 return FILE_ACTION_CREATE;
                                676             662 :             case FILE_TYPE_REGULAR:
                                677             662 :                 return FILE_ACTION_COPY;
  886 heikki.linnakangas        678 UBC           0 :             case FILE_TYPE_UNDEFINED:
                                679               0 :                 pg_fatal("unknown file type for \"%s\"", entry->path);
                                680                 :                 break;
                                681                 :         }
                                682                 :     }
  886 heikki.linnakangas        683 CBC       14801 :     else if (entry->target_exists && !entry->source_exists)
                                684                 :     {
                                685                 :         /* File exists in target, but not source. Remove it. */
                                686             641 :         return FILE_ACTION_REMOVE;
                                687                 :     }
                                688           14160 :     else if (!entry->target_exists && !entry->source_exists)
                                689                 :     {
                                690                 :         /*
                                691                 :          * Doesn't exist in either server. Why does it have an entry in the
                                692                 :          * first place??
                                693                 :          */
  886 heikki.linnakangas        694 UBC           0 :         Assert(false);
                                695                 :         return FILE_ACTION_NONE;
                                696                 :     }
                                697                 : 
                                698                 :     /*
                                699                 :      * Otherwise, the file exists on both systems
                                700                 :      */
  886 heikki.linnakangas        701 CBC       14160 :     Assert(entry->target_exists && entry->source_exists);
                                702                 : 
                                703           14160 :     if (entry->source_type != entry->target_type)
                                704                 :     {
                                705                 :         /* But it's a different kind of object. Strange.. */
  886 heikki.linnakangas        706 UBC           0 :         pg_fatal("file \"%s\" is of different type in source and target", entry->path);
                                707                 :     }
                                708                 : 
                                709                 :     /*
                                710                 :      * PG_VERSION files should be identical on both systems, but avoid
                                711                 :      * overwriting them for paranoia.
                                712                 :      */
  886 heikki.linnakangas        713 CBC       14160 :     if (pg_str_endswith(entry->path, "PG_VERSION"))
                                714              56 :         return FILE_ACTION_NONE;
                                715                 : 
                                716           14104 :     switch (entry->source_type)
                                717                 :     {
                                718             332 :         case FILE_TYPE_DIRECTORY:
                                719             332 :             return FILE_ACTION_NONE;
                                720                 : 
  886 heikki.linnakangas        721 UBC           0 :         case FILE_TYPE_SYMLINK:
                                722                 : 
                                723                 :             /*
                                724                 :              * XXX: Should we check if it points to the same target?
                                725                 :              */
                                726               0 :             return FILE_ACTION_NONE;
                                727                 : 
  886 heikki.linnakangas        728 CBC       13772 :         case FILE_TYPE_REGULAR:
                                729           13772 :             if (!entry->isrelfile)
                                730                 :             {
                                731                 :                 /*
                                732                 :                  * It's a non-data file that we have no special processing
                                733                 :                  * for. Copy it in toto.
                                734                 :                  */
                                735            3518 :                 return FILE_ACTION_COPY;
                                736                 :             }
                                737                 :             else
                                738                 :             {
                                739                 :                 /*
                                740                 :                  * It's a data file that exists in both systems.
                                741                 :                  *
                                742                 :                  * If it's larger in target, we can truncate it. There will
                                743                 :                  * also be a WAL record of the truncation in the source
                                744                 :                  * system, so WAL replay would eventually truncate the target
                                745                 :                  * too, but we might as well do it now.
                                746                 :                  *
                                747                 :                  * If it's smaller in the target, it means that it has been
                                748                 :                  * truncated in the target, or enlarged in the source, or
                                749                 :                  * both. If it was truncated in the target, we need to copy
                                750                 :                  * the missing tail from the source system. If it was enlarged
                                751                 :                  * in the source system, there will be WAL records in the
                                752                 :                  * source system for the new blocks, so we wouldn't need to
                                753                 :                  * copy them here. But we don't know which scenario we're
                                754                 :                  * dealing with, and there's no harm in copying the missing
                                755                 :                  * blocks now, so do it now.
                                756                 :                  *
                                757                 :                  * If it's the same size, do nothing here. Any blocks modified
                                758                 :                  * in the target will be copied based on parsing the target
                                759                 :                  * system's WAL, and any blocks modified in the source will be
                                760                 :                  * updated after rewinding, when the source system's WAL is
                                761                 :                  * replayed.
                                762                 :                  */
                                763           10254 :                 if (entry->target_size < entry->source_size)
                                764               5 :                     return FILE_ACTION_COPY_TAIL;
                                765           10249 :                 else if (entry->target_size > entry->source_size)
                                766               4 :                     return FILE_ACTION_TRUNCATE;
                                767                 :                 else
                                768           10245 :                     return FILE_ACTION_NONE;
                                769                 :             }
                                770                 :             break;
                                771                 : 
  886 heikki.linnakangas        772 UBC           0 :         case FILE_TYPE_UNDEFINED:
                                773               0 :             pg_fatal("unknown file type for \"%s\"", path);
                                774                 :             break;
                                775                 :     }
                                776                 : 
                                777                 :     /* unreachable */
                                778               0 :     pg_fatal("could not decide what to do with file \"%s\"", path);
                                779                 : }
                                780                 : 
                                781                 : /*
                                782                 :  * Decide what to do with each file.
                                783                 :  *
                                784                 :  * Returns a 'filemap' with the entries in the order that their actions
                                785                 :  * should be executed.
                                786                 :  */
                                787                 : filemap_t *
  886 heikki.linnakangas        788 CBC          13 : decide_file_actions(void)
                                789                 : {
                                790                 :     int         i;
                                791                 :     filehash_iterator it;
                                792                 :     file_entry_t *entry;
                                793                 :     filemap_t  *filemap;
                                794                 : 
                                795              13 :     filehash_start_iterate(filehash, &it);
                                796           15589 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
                                797                 :     {
                                798           15576 :         entry->action = decide_file_action(entry);
                                799                 :     }
                                800                 : 
                                801                 :     /*
                                802                 :      * Turn the hash table into an array, and sort in the order that the
                                803                 :      * actions should be performed.
                                804                 :      */
                                805              13 :     filemap = pg_malloc(offsetof(filemap_t, entries) +
                                806              13 :                         filehash->members * sizeof(file_entry_t *));
                                807              13 :     filemap->nentries = filehash->members;
                                808              13 :     filehash_start_iterate(filehash, &it);
                                809              13 :     i = 0;
                                810           15589 :     while ((entry = filehash_iterate(filehash, &it)) != NULL)
                                811                 :     {
                                812           15576 :         filemap->entries[i++] = entry;
                                813                 :     }
                                814                 : 
                                815              13 :     qsort(&filemap->entries, filemap->nentries, sizeof(file_entry_t *),
                                816                 :           final_filemap_cmp);
                                817                 : 
                                818              13 :     return filemap;
                                819                 : }
                                820                 : 
                                821                 : 
                                822                 : /*
                                823                 :  * Helper function for filemap hash table.
                                824                 :  */
                                825                 : static uint32
                                826          133990 : hash_string_pointer(const char *s)
                                827                 : {
                                828          133990 :     unsigned char *ss = (unsigned char *) s;
                                829                 : 
                                830          133990 :     return hash_bytes(ss, strlen(s));
                                831                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a