LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - xlogutils.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 75.7 % 243 184 10 8 29 12 7 115 24 38 35 122 5 16
Current Date: 2023-04-08 17:13:01 Functions: 90.9 % 22 20 1 1 14 6 1 18 1 2
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (60,120] days: 66.7 % 3 2 1 2
(180,240] days: 66.7 % 3 2 1 2 1
(240..) days: 75.8 % 236 179 9 7 29 12 7 115 19 38 34 122
Function coverage date bins:
(240..) days: 48.8 % 41 20 1 1 14 6 1 18

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * xlogutils.c
                                  4                 :  *
                                  5                 :  * PostgreSQL write-ahead log manager utility routines
                                  6                 :  *
                                  7                 :  * This file contains support routines that are used by XLOG replay functions.
                                  8                 :  * None of this code is used during normal system operation.
                                  9                 :  *
                                 10                 :  *
                                 11                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 12                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 13                 :  *
                                 14                 :  * src/backend/access/transam/xlogutils.c
                                 15                 :  *
                                 16                 :  *-------------------------------------------------------------------------
                                 17                 :  */
                                 18                 : #include "postgres.h"
                                 19                 : 
                                 20                 : #include <unistd.h>
                                 21                 : 
                                 22                 : #include "access/timeline.h"
                                 23                 : #include "access/xlogrecovery.h"
                                 24                 : #include "access/xlog_internal.h"
                                 25                 : #include "access/xlogprefetcher.h"
                                 26                 : #include "access/xlogutils.h"
                                 27                 : #include "miscadmin.h"
                                 28                 : #include "pgstat.h"
                                 29                 : #include "storage/fd.h"
                                 30                 : #include "storage/smgr.h"
                                 31                 : #include "utils/guc.h"
                                 32                 : #include "utils/hsearch.h"
                                 33                 : #include "utils/rel.h"
                                 34                 : 
                                 35                 : 
                                 36                 : /* GUC variable */
                                 37                 : bool        ignore_invalid_pages = false;
                                 38                 : 
                                 39                 : /*
                                 40                 :  * Are we doing recovery from XLOG?
                                 41                 :  *
                                 42                 :  * This is only ever true in the startup process; it should be read as meaning
                                 43                 :  * "this process is replaying WAL records", rather than "the system is in
                                 44                 :  * recovery mode".  It should be examined primarily by functions that need
                                 45                 :  * to act differently when called from a WAL redo function (e.g., to skip WAL
                                 46                 :  * logging).  To check whether the system is in recovery regardless of which
                                 47                 :  * process you're running in, use RecoveryInProgress() but only after shared
                                 48                 :  * memory startup and lock initialization.
                                 49                 :  *
                                 50                 :  * This is updated from xlog.c and xlogrecovery.c, but lives here because
                                 51                 :  * it's mostly read by WAL redo functions.
                                 52                 :  */
                                 53                 : bool        InRecovery = false;
                                 54                 : 
                                 55                 : /* Are we in Hot Standby mode? Only valid in startup process, see xlogutils.h */
                                 56                 : HotStandbyState standbyState = STANDBY_DISABLED;
                                 57                 : 
                                 58                 : /*
                                 59                 :  * During XLOG replay, we may see XLOG records for incremental updates of
                                 60                 :  * pages that no longer exist, because their relation was later dropped or
                                 61                 :  * truncated.  (Note: this is only possible when full_page_writes = OFF,
                                 62                 :  * since when it's ON, the first reference we see to a page should always
                                 63                 :  * be a full-page rewrite not an incremental update.)  Rather than simply
                                 64                 :  * ignoring such records, we make a note of the referenced page, and then
                                 65                 :  * complain if we don't actually see a drop or truncate covering the page
                                 66                 :  * later in replay.
                                 67                 :  */
                                 68                 : typedef struct xl_invalid_page_key
                                 69                 : {
                                 70                 :     RelFileLocator locator;     /* the relation */
                                 71                 :     ForkNumber  forkno;         /* the fork number */
                                 72                 :     BlockNumber blkno;          /* the page */
                                 73                 : } xl_invalid_page_key;
                                 74                 : 
                                 75                 : typedef struct xl_invalid_page
                                 76                 : {
                                 77                 :     xl_invalid_page_key key;    /* hash key ... must be first */
                                 78                 :     bool        present;        /* page existed but contained zeroes */
                                 79                 : } xl_invalid_page;
                                 80                 : 
                                 81                 : static HTAB *invalid_page_tab = NULL;
                                 82                 : 
                                 83                 : static int  read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                 84                 :                                       int reqLen, XLogRecPtr targetRecPtr,
                                 85                 :                                       char *cur_page, bool wait_for_wal);
                                 86                 : 
                                 87                 : /* Report a reference to an invalid page */
                                 88                 : static void
  277 rhaas                      89 UNC           0 : report_invalid_page(int elevel, RelFileLocator locator, ForkNumber forkno,
                                 90                 :                     BlockNumber blkno, bool present)
                                 91                 : {
                                 92               0 :     char       *path = relpathperm(locator, forkno);
                                 93                 : 
 4146 heikki.linnakangas         94 UBC           0 :     if (present)
                                 95               0 :         elog(elevel, "page %u of relation %s is uninitialized",
                                 96                 :              blkno, path);
                                 97                 :     else
                                 98               0 :         elog(elevel, "page %u of relation %s does not exist",
                                 99                 :              blkno, path);
                                100               0 :     pfree(path);
                                101               0 : }
                                102                 : 
                                103                 : /* Log a reference to an invalid page */
                                104                 : static void
  277 rhaas                     105 GNC          45 : log_invalid_page(RelFileLocator locator, ForkNumber forkno, BlockNumber blkno,
                                106                 :                  bool present)
                                107                 : {
                                108                 :     xl_invalid_page_key key;
                                109                 :     xl_invalid_page *hentry;
                                110                 :     bool        found;
                                111                 : 
                                112                 :     /*
                                113                 :      * Once recovery has reached a consistent state, the invalid-page table
                                114                 :      * should be empty and remain so. If a reference to an invalid page is
                                115                 :      * found after consistency is reached, PANIC immediately. This might seem
                                116                 :      * aggressive, but it's better than letting the invalid reference linger
                                117                 :      * in the hash table until the end of recovery and PANIC there, which
                                118                 :      * might come only much later if this is a standby server.
                                119                 :      */
 4139 heikki.linnakangas        120 CBC          45 :     if (reachedConsistency)
                                121                 :     {
  277 rhaas                     122 UNC           0 :         report_invalid_page(WARNING, locator, forkno, blkno, present);
 1173 fujii                     123 UBC           0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
                                124                 :              "WAL contains references to invalid pages");
                                125                 :     }
                                126                 : 
                                127                 :     /*
                                128                 :      * Log references to invalid pages at DEBUG1 level.  This allows some
                                129                 :      * tracing of the cause (note the elog context mechanism will tell us
                                130                 :      * something about the XLOG record that generated the reference).
                                131                 :      */
  867 tgl                       132 CBC          45 :     if (message_level_is_interesting(DEBUG1))
  277 rhaas                     133 UNC           0 :         report_invalid_page(DEBUG1, locator, forkno, blkno, present);
                                134                 : 
 6204 tgl                       135 CBC          45 :     if (invalid_page_tab == NULL)
                                136                 :     {
                                137                 :         /* create hash table when first needed */
                                138                 :         HASHCTL     ctl;
                                139                 : 
                                140               1 :         ctl.keysize = sizeof(xl_invalid_page_key);
                                141               1 :         ctl.entrysize = sizeof(xl_invalid_page);
                                142                 : 
                                143               1 :         invalid_page_tab = hash_create("XLOG invalid-page table",
                                144                 :                                        100,
                                145                 :                                        &ctl,
                                146                 :                                        HASH_ELEM | HASH_BLOBS);
                                147                 :     }
                                148                 : 
                                149                 :     /* we currently assume xl_invalid_page_key contains no padding */
  277 rhaas                     150 GNC          45 :     key.locator = locator;
 5354 heikki.linnakangas        151 CBC          45 :     key.forkno = forkno;
 6204 tgl                       152              45 :     key.blkno = blkno;
                                153                 :     hentry = (xl_invalid_page *)
   62 peter                     154 GNC          45 :         hash_search(invalid_page_tab, &key, HASH_ENTER, &found);
                                155                 : 
 6204 tgl                       156 CBC          45 :     if (!found)
                                157                 :     {
                                158                 :         /* hash_search already filled in the key */
                                159              45 :         hentry->present = present;
                                160                 :     }
                                161                 :     else
                                162                 :     {
                                163                 :         /* repeat reference ... leave "present" as it was */
                                164                 :     }
                                165              45 : }
                                166                 : 
                                167                 : /* Forget any invalid pages >= minblkno, because they've been dropped */
                                168                 : static void
  277 rhaas                     169 GNC       24406 : forget_invalid_pages(RelFileLocator locator, ForkNumber forkno,
                                170                 :                      BlockNumber minblkno)
                                171                 : {
                                172                 :     HASH_SEQ_STATUS status;
                                173                 :     xl_invalid_page *hentry;
                                174                 : 
 6204 tgl                       175 GIC       24406 :     if (invalid_page_tab == NULL)
 6204 tgl                       176 CBC       24390 :         return;                 /* nothing to do */
 6204 tgl                       177 ECB             : 
 6204 tgl                       178 GIC          16 :     hash_seq_init(&status, invalid_page_tab);
 6204 tgl                       179 ECB             : 
 6204 tgl                       180 GIC         601 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
 6204 tgl                       181 ECB             :     {
  277 rhaas                     182 GNC         585 :         if (RelFileLocatorEquals(hentry->key.locator, locator) &&
 5354 heikki.linnakangas        183 CBC          45 :             hentry->key.forkno == forkno &&
 6204 tgl                       184              45 :             hentry->key.blkno >= minblkno)
 6204 tgl                       185 ECB             :         {
  867 tgl                       186 GIC          45 :             if (message_level_is_interesting(DEBUG2))
 5262 heikki.linnakangas        187 ECB             :             {
  277 rhaas                     188 UNC           0 :                 char       *path = relpathperm(hentry->key.locator, forkno);
 5050 bruce                     189 EUB             : 
 5262 heikki.linnakangas        190 UIC           0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
 5262 heikki.linnakangas        191 EUB             :                      hentry->key.blkno, path);
 5262 heikki.linnakangas        192 UIC           0 :                 pfree(path);
 5262 heikki.linnakangas        193 EUB             :             }
                                194                 : 
 6204 tgl                       195 GIC          45 :             if (hash_search(invalid_page_tab,
   62 peter                     196 GNC          45 :                             &hentry->key,
 6204 tgl                       197 ECB             :                             HASH_REMOVE, NULL) == NULL)
 6204 tgl                       198 UIC           0 :                 elog(ERROR, "hash table corrupted");
 6204 tgl                       199 EUB             :         }
                                200                 :     }
                                201                 : }
                                202                 : 
                                203                 : /* Forget any invalid pages in a whole database */
                                204                 : static void
 6204 tgl                       205 GIC           9 : forget_invalid_pages_db(Oid dbid)
 6204 tgl                       206 ECB             : {
                                207                 :     HASH_SEQ_STATUS status;
                                208                 :     xl_invalid_page *hentry;
                                209                 : 
 6204 tgl                       210 GIC           9 :     if (invalid_page_tab == NULL)
 6204 tgl                       211 CBC           9 :         return;                 /* nothing to do */
 6204 tgl                       212 ECB             : 
 6204 tgl                       213 UIC           0 :     hash_seq_init(&status, invalid_page_tab);
 6204 tgl                       214 EUB             : 
 6204 tgl                       215 UIC           0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
 6204 tgl                       216 EUB             :     {
  277 rhaas                     217 UNC           0 :         if (hentry->key.locator.dbOid == dbid)
 6204 tgl                       218 EUB             :         {
  867 tgl                       219 UIC           0 :             if (message_level_is_interesting(DEBUG2))
 5262 heikki.linnakangas        220 EUB             :             {
  277 rhaas                     221 UNC           0 :                 char       *path = relpathperm(hentry->key.locator, hentry->key.forkno);
 5050 bruce                     222 EUB             : 
 5262 heikki.linnakangas        223 UIC           0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
 5262 heikki.linnakangas        224 EUB             :                      hentry->key.blkno, path);
 5262 heikki.linnakangas        225 UIC           0 :                 pfree(path);
 5262 heikki.linnakangas        226 EUB             :             }
                                227                 : 
 6204 tgl                       228 UIC           0 :             if (hash_search(invalid_page_tab,
   62 peter                     229 UNC           0 :                             &hentry->key,
 6204 tgl                       230 EUB             :                             HASH_REMOVE, NULL) == NULL)
 6204 tgl                       231 UIC           0 :                 elog(ERROR, "hash table corrupted");
 6204 tgl                       232 EUB             :         }
                                233                 :     }
                                234                 : }
                                235                 : 
                                236                 : /* Are there any unresolved references to invalid pages? */
                                237                 : bool
 4146 heikki.linnakangas        238 GIC         166 : XLogHaveInvalidPages(void)
 4146 heikki.linnakangas        239 ECB             : {
 4146 heikki.linnakangas        240 GIC         166 :     if (invalid_page_tab != NULL &&
 4146 heikki.linnakangas        241 LBC           0 :         hash_get_num_entries(invalid_page_tab) > 0)
 4146 heikki.linnakangas        242 UBC           0 :         return true;
 4146 heikki.linnakangas        243 GBC         166 :     return false;
 4146 heikki.linnakangas        244 ECB             : }
                                245                 : 
                                246                 : /* Complain about any remaining invalid-page entries */
                                247                 : void
 6204 tgl                       248 GIC          75 : XLogCheckInvalidPages(void)
 6204 tgl                       249 ECB             : {
                                250                 :     HASH_SEQ_STATUS status;
                                251                 :     xl_invalid_page *hentry;
 6204 tgl                       252 GIC          75 :     bool        foundone = false;
 6204 tgl                       253 ECB             : 
 6204 tgl                       254 GIC          75 :     if (invalid_page_tab == NULL)
 6204 tgl                       255 CBC          75 :         return;                 /* nothing to do */
 6204 tgl                       256 ECB             : 
 6204 tgl                       257 UIC           0 :     hash_seq_init(&status, invalid_page_tab);
 6204 tgl                       258 EUB             : 
                                259                 :     /*
                                260                 :      * Our strategy is to emit WARNING messages for all remaining entries and
                                261                 :      * only PANIC after we've dumped all the available info.
                                262                 :      */
 6204 tgl                       263 UIC           0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
 6204 tgl                       264 EUB             :     {
  277 rhaas                     265 UNC           0 :         report_invalid_page(WARNING, hentry->key.locator, hentry->key.forkno,
 4146 heikki.linnakangas        266 UBC           0 :                             hentry->key.blkno, hentry->present);
 6204 tgl                       267               0 :         foundone = true;
 6204 tgl                       268 EUB             :     }
                                269                 : 
 6204 tgl                       270 UIC           0 :     if (foundone)
 1173 fujii                     271 UBC           0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
 1173 fujii                     272 EUB             :              "WAL contains references to invalid pages");
                                273                 : 
 5414 heikki.linnakangas        274 UIC           0 :     hash_destroy(invalid_page_tab);
 5414 heikki.linnakangas        275 UBC           0 :     invalid_page_tab = NULL;
 6204 tgl                       276 EUB             : }
                                277                 : 
                                278                 : 
                                279                 : /*
                                280                 :  * XLogReadBufferForRedo
                                281                 :  *      Read a page during XLOG replay
                                282                 :  *
                                283                 :  * Reads a block referenced by a WAL record into shared buffer cache, and
                                284                 :  * determines what needs to be done to redo the changes to it.  If the WAL
                                285                 :  * record includes a full-page image of the page, it is restored.
                                286                 :  *
                                287                 :  * 'record.EndRecPtr' is compared to the page's LSN to determine if the record
                                288                 :  * has already been replayed.  'block_id' is the ID number the block was
                                289                 :  * registered with, when the WAL record was created.
                                290                 :  *
                                291                 :  * Returns one of the following:
                                292                 :  *
                                293                 :  *  BLK_NEEDS_REDO  - changes from the WAL record need to be applied
                                294                 :  *  BLK_DONE        - block doesn't need replaying
                                295                 :  *  BLK_RESTORED    - block was restored from a full-page image included in
                                296                 :  *                    the record
                                297                 :  *  BLK_NOTFOUND    - block was not found (because it was truncated away by
                                298                 :  *                    an operation later in the WAL stream)
                                299                 :  *
                                300                 :  * On return, the buffer is locked in exclusive-mode, and returned in *buf.
                                301                 :  * Note that the buffer is locked and returned even if it doesn't need
                                302                 :  * replaying.  (Getting the buffer lock is not really necessary during
                                303                 :  * single-process crash recovery, but some subroutines such as MarkBufferDirty
                                304                 :  * will complain if we don't have the lock.  In hot standby mode it's
                                305                 :  * definitely necessary.)
                                306                 :  *
                                307                 :  * Note: when a backup block is available in XLOG with the BKPIMAGE_APPLY flag
                                308                 :  * set, we restore it, even if the page in the database appears newer.  This
                                309                 :  * is to protect ourselves against database pages that were partially or
                                310                 :  * incorrectly written during a crash.  We assume that the XLOG data must be
                                311                 :  * good because it has passed a CRC check, while the database page might not
                                312                 :  * be.  This will force us to replay all subsequent modifications of the page
                                313                 :  * that appear in XLOG, rather than possibly ignoring them as already
                                314                 :  * applied, but that's not a huge drawback.
                                315                 :  */
                                316                 : XLogRedoAction
 3062 heikki.linnakangas        317 GIC     2591739 : XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
 3161 heikki.linnakangas        318 ECB             :                       Buffer *buf)
                                319                 : {
 3062 heikki.linnakangas        320 GIC     2591739 :     return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
 3062 heikki.linnakangas        321 ECB             :                                          false, buf);
                                322                 : }
                                323                 : 
                                324                 : /*
                                325                 :  * Pin and lock a buffer referenced by a WAL record, for the purpose of
                                326                 :  * re-initializing it.
                                327                 :  */
                                328                 : Buffer
 3062 heikki.linnakangas        329 GIC       50028 : XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
 3062 heikki.linnakangas        330 ECB             : {
                                331                 :     Buffer      buf;
                                332                 : 
 3062 heikki.linnakangas        333 GIC       50028 :     XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
 3062 heikki.linnakangas        334 ECB             :                                   &buf);
 3062 heikki.linnakangas        335 GIC       50028 :     return buf;
 3161 heikki.linnakangas        336 ECB             : }
                                337                 : 
                                338                 : /*
                                339                 :  * XLogReadBufferForRedoExtended
                                340                 :  *      Like XLogReadBufferForRedo, but with extra options.
                                341                 :  *
                                342                 :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                343                 :  * with all-zeroes pages up to the referenced block number.  In
                                344                 :  * RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
                                345                 :  * is always BLK_NEEDS_REDO.
                                346                 :  *
                                347                 :  * (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
                                348                 :  * parameter. Do not use an inconsistent combination!)
                                349                 :  *
                                350                 :  * If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
                                351                 :  * using LockBufferForCleanup(), instead of a regular exclusive lock.
                                352                 :  */
                                353                 : XLogRedoAction
 3062 heikki.linnakangas        354 GIC     2654768 : XLogReadBufferForRedoExtended(XLogReaderState *record,
 3062 heikki.linnakangas        355 ECB             :                               uint8 block_id,
                                356                 :                               ReadBufferMode mode, bool get_cleanup_lock,
                                357                 :                               Buffer *buf)
                                358                 : {
 3062 heikki.linnakangas        359 GIC     2654768 :     XLogRecPtr  lsn = record->EndRecPtr;
                                360                 :     RelFileLocator rlocator;
                                361                 :     ForkNumber  forknum;
                                362                 :     BlockNumber blkno;
                                363                 :     Buffer      prefetch_buffer;
                                364                 :     Page        page;
                                365                 :     bool        zeromode;
                                366                 :     bool        willinit;
                                367                 : 
  277 rhaas                     368 GNC     2654768 :     if (!XLogRecGetBlockTagExtended(record, block_id, &rlocator, &forknum, &blkno,
  367 tmunro                    369 ECB             :                                     &prefetch_buffer))
                                370                 :     {
                                371                 :         /* Caller specified a bogus block_id */
  363 tgl                       372 UIC           0 :         elog(PANIC, "failed to locate backup block with ID %d in WAL record",
  363 tgl                       373 EUB             :              block_id);
                                374                 :     }
                                375                 : 
                                376                 :     /*
                                377                 :      * Make sure that if the block is marked with WILL_INIT, the caller is
                                378                 :      * going to initialize it. And vice versa.
                                379                 :      */
 2820 heikki.linnakangas        380 GIC     2654768 :     zeromode = (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
  387 tmunro                    381 CBC     2654768 :     willinit = (XLogRecGetBlock(record, block_id)->flags & BKPBLOCK_WILL_INIT) != 0;
 2820 heikki.linnakangas        382         2654768 :     if (willinit && !zeromode)
 2820 heikki.linnakangas        383 LBC           0 :         elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
 2820 heikki.linnakangas        384 GBC     2654768 :     if (!willinit && zeromode)
 2820 heikki.linnakangas        385 LBC           0 :         elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
 2820 heikki.linnakangas        386 EUB             : 
                                387                 :     /* If it has a full-page image and it should be restored, do it. */
 2251 rhaas                     388 GIC     2654768 :     if (XLogRecBlockImageApply(record, block_id))
 3161 heikki.linnakangas        389 ECB             :     {
 2251 rhaas                     390 GIC       36507 :         Assert(XLogRecHasBlockImage(record, block_id));
  277 rhaas                     391 GNC       36507 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno,
  367 tmunro                    392 ECB             :                                       get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK,
                                393                 :                                       prefetch_buffer);
 2545 kgrittn                   394 GIC       36507 :         page = BufferGetPage(*buf);
 3062 heikki.linnakangas        395 CBC       36507 :         if (!RestoreBlockImage(record, block_id, page))
  212 michael                   396 LBC           0 :             ereport(ERROR,
  212 michael                   397 EUB             :                     (errcode(ERRCODE_INTERNAL_ERROR),
                                398                 :                      errmsg_internal("%s", record->errormsg_buf)));
                                399                 : 
                                400                 :         /*
                                401                 :          * The page may be uninitialized. If so, we can't set the LSN because
                                402                 :          * that would corrupt the page.
                                403                 :          */
 3062 heikki.linnakangas        404 GIC       36507 :         if (!PageIsNew(page))
 3062 heikki.linnakangas        405 ECB             :         {
 3062 heikki.linnakangas        406 GIC       36492 :             PageSetLSN(page, lsn);
 3062 heikki.linnakangas        407 ECB             :         }
                                408                 : 
 3062 heikki.linnakangas        409 GIC       36507 :         MarkBufferDirty(*buf);
 3062 heikki.linnakangas        410 ECB             : 
                                411                 :         /*
                                412                 :          * At the end of crash recovery the init forks of unlogged relations
                                413                 :          * are copied, without going through shared buffers. So we need to
                                414                 :          * force the on-disk state of init forks to always be in sync with the
                                415                 :          * state in shared buffers.
                                416                 :          */
 2677 andres                    417 GIC       36507 :         if (forknum == INIT_FORKNUM)
 2677 andres                    418 CBC          22 :             FlushOneBuffer(*buf);
 2677 andres                    419 ECB             : 
 3161 heikki.linnakangas        420 GIC       36507 :         return BLK_RESTORED;
 3161 heikki.linnakangas        421 ECB             :     }
                                422                 :     else
                                423                 :     {
  277 rhaas                     424 GNC     2618261 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno, mode, prefetch_buffer);
 3161 heikki.linnakangas        425 CBC     2618261 :         if (BufferIsValid(*buf))
 3161 heikki.linnakangas        426 ECB             :         {
 3069 heikki.linnakangas        427 GIC     2618216 :             if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
 3069 heikki.linnakangas        428 ECB             :             {
 3069 heikki.linnakangas        429 GIC     2567967 :                 if (get_cleanup_lock)
 3069 heikki.linnakangas        430 CBC        7003 :                     LockBufferForCleanup(*buf);
 3069 heikki.linnakangas        431 ECB             :                 else
 3069 heikki.linnakangas        432 GIC     2560964 :                     LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
 3069 heikki.linnakangas        433 ECB             :             }
 2545 kgrittn                   434 GIC     2618216 :             if (lsn <= PageGetLSN(BufferGetPage(*buf)))
 3161 heikki.linnakangas        435 LBC           0 :                 return BLK_DONE;
 3161 heikki.linnakangas        436 EUB             :             else
 3161 heikki.linnakangas        437 GIC     2618216 :                 return BLK_NEEDS_REDO;
 3161 heikki.linnakangas        438 ECB             :         }
                                439                 :         else
 3161 heikki.linnakangas        440 GIC          45 :             return BLK_NOTFOUND;
 3161 heikki.linnakangas        441 ECB             :     }
                                442                 : }
                                443                 : 
                                444                 : /*
                                445                 :  * XLogReadBufferExtended
                                446                 :  *      Read a page during XLOG replay
                                447                 :  *
                                448                 :  * This is functionally comparable to ReadBufferExtended. There's some
                                449                 :  * differences in the behavior wrt. the "mode" argument:
                                450                 :  *
                                451                 :  * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we
                                452                 :  * return InvalidBuffer. In this case the caller should silently skip the
                                453                 :  * update on this page. (In this situation, we expect that the page was later
                                454                 :  * dropped or truncated. If we don't see evidence of that later in the WAL
                                455                 :  * sequence, we'll complain at the end of WAL replay.)
                                456                 :  *
                                457                 :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                458                 :  * with all-zeroes pages up to the given block number.
                                459                 :  *
                                460                 :  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
                                461                 :  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
                                462                 :  * to imply that the page should be dropped or truncated later.
                                463                 :  *
                                464                 :  * Optionally, recent_buffer can be used to provide a hint about the location
                                465                 :  * of the page in the buffer pool; it does not have to be correct, but avoids
                                466                 :  * a buffer mapping table probe if it is.
                                467                 :  *
                                468                 :  * NB: A redo function should normally not call this directly. To get a page
                                469                 :  * to modify, use XLogReadBufferForRedoExtended instead. It is important that
                                470                 :  * all pages modified by a WAL record are registered in the WAL records, or
                                471                 :  * they will be invisible to tools that need to know which pages are modified.
                                472                 :  */
                                473                 : Buffer
  277 rhaas                     474 GNC     2926216 : XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
  367 tmunro                    475 ECB             :                        BlockNumber blkno, ReadBufferMode mode,
                                476                 :                        Buffer recent_buffer)
                                477                 : {
                                478                 :     BlockNumber lastblock;
                                479                 :     Buffer      buffer;
                                480                 :     SMgrRelation smgr;
                                481                 : 
 6220 tgl                       482 GIC     2926216 :     Assert(blkno != P_NEW);
 6220 tgl                       483 ECB             : 
                                484                 :     /* Do we have a clue where the buffer might be already? */
  367 tmunro                    485 GIC     2926216 :     if (BufferIsValid(recent_buffer) &&
  367 tmunro                    486 CBC      423615 :         mode == RBM_NORMAL &&
  277 rhaas                     487 GNC      423615 :         ReadRecentBuffer(rlocator, forknum, blkno, recent_buffer))
  367 tmunro                    488 ECB             :     {
  367 tmunro                    489 GIC      422006 :         buffer = recent_buffer;
  367 tmunro                    490 CBC      422006 :         goto recent_buffer_fast_path;
  367 tmunro                    491 ECB             :     }
                                492                 : 
                                493                 :     /* Open the relation at smgr level */
  277 rhaas                     494 GNC     2504210 :     smgr = smgropen(rlocator, InvalidBackendId);
 5414 heikki.linnakangas        495 ECB             : 
                                496                 :     /*
                                497                 :      * Create the target file if it doesn't already exist.  This lets us cope
                                498                 :      * if the replay sequence contains writes to a relation that is later
                                499                 :      * deleted.  (The original coding of this routine would instead suppress
                                500                 :      * the writes, but that seems like it risks losing valuable data if the
                                501                 :      * filesystem loses an inode during a crash.  Better to write the data
                                502                 :      * until we are actually told to delete the file.)
                                503                 :      */
 5254 heikki.linnakangas        504 GIC     2504210 :     smgrcreate(smgr, forknum, true);
 5414 heikki.linnakangas        505 ECB             : 
 5354 heikki.linnakangas        506 GIC     2504210 :     lastblock = smgrnblocks(smgr, forknum);
 5414 heikki.linnakangas        507 ECB             : 
 6220 tgl                       508 GIC     2504210 :     if (blkno < lastblock)
 8206 vadim4o                   509 ECB             :     {
                                510                 :         /* page exists in file */
  277 rhaas                     511 GNC     2467792 :         buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno,
  376 rhaas                     512 ECB             :                                            mode, NULL, true);
                                513                 :     }
                                514                 :     else
                                515                 :     {
                                516                 :         /* hm, page doesn't exist in file */
 5273 heikki.linnakangas        517 GIC       36418 :         if (mode == RBM_NORMAL)
 6204 tgl                       518 ECB             :         {
  277 rhaas                     519 GNC          45 :             log_invalid_page(rlocator, forknum, blkno, false);
 6204 tgl                       520 CBC          45 :             return InvalidBuffer;
 6204 tgl                       521 ECB             :         }
 3372 tgl                       522 GIC       36373 :         if (mode == RBM_NORMAL_NO_LOG)
 3372 tgl                       523 LBC           0 :             return InvalidBuffer;
 6220 tgl                       524 EUB             :         /* OK to extend the file */
                                525                 :         /* we do this in recovery only - no rel-extension lock needed */
 6220 tgl                       526 GIC       36373 :         Assert(InRecovery);
    3 andres                    527 GNC       36373 :         buffer = ExtendBufferedRelTo(EB_SMGR(smgr, RELPERSISTENCE_PERMANENT),
                                528                 :                                      forknum,
                                529                 :                                      NULL,
                                530                 :                                      EB_PERFORMING_RECOVERY |
                                531                 :                                      EB_SKIP_EXTENSION_LOCK,
                                532                 :                                      blkno + 1,
                                533                 :                                      mode);
                                534                 :     }
 6220 tgl                       535 EUB             : 
  367 tmunro                    536 GBC     2926171 : recent_buffer_fast_path:
 5273 heikki.linnakangas        537         2926171 :     if (mode == RBM_NORMAL)
                                538                 :     {
                                539                 :         /* check that page has been initialized */
 2545 kgrittn                   540 GIC     2564524 :         Page        page = (Page) BufferGetPage(buffer);
 6220 tgl                       541 ECB             : 
                                542                 :         /*
                                543                 :          * We assume that PageIsNew is safe without a lock. During recovery,
                                544                 :          * there should be no other backends that could modify the buffer at
                                545                 :          * the same time.
                                546                 :          */
 5383 tgl                       547 GIC     2564524 :         if (PageIsNew(page))
                                548                 :         {
 5192 heikki.linnakangas        549 UIC           0 :             ReleaseBuffer(buffer);
  277 rhaas                     550 UNC           0 :             log_invalid_page(rlocator, forknum, blkno, true);
 6204 tgl                       551 UIC           0 :             return InvalidBuffer;
                                552                 :         }
                                553                 :     }
                                554                 : 
 6297 neilc                     555 GIC     2926171 :     return buffer;
                                556                 : }
                                557                 : 
                                558                 : /*
                                559                 :  * Struct actually returned by CreateFakeRelcacheEntry, though the declared
                                560                 :  * return type is Relation.
                                561                 :  */
                                562                 : typedef struct
                                563                 : {
                                564                 :     RelationData reldata;       /* Note: this must be first */
                                565                 :     FormData_pg_class pgc;
                                566                 : } FakeRelCacheEntryData;
                                567                 : 
                                568                 : typedef FakeRelCacheEntryData *FakeRelCacheEntry;
                                569                 : 
                                570                 : /*
 5414 heikki.linnakangas        571 ECB             :  * Create a fake relation cache entry for a physical relation
                                572                 :  *
                                573                 :  * It's often convenient to use the same functions in XLOG replay as in the
                                574                 :  * main codepath, but those functions typically work with a relcache entry.
                                575                 :  * We don't have a working relation cache during XLOG replay, but this
                                576                 :  * function can be used to create a fake relcache entry instead. Only the
 5050 bruce                     577                 :  * fields related to physical storage, like rd_rel, are initialized, so the
 5414 heikki.linnakangas        578                 :  * fake entry is only usable in low-level operations like ReadBuffer().
                                579                 :  *
 1100 noah                      580                 :  * This is also used for syncing WAL-skipped files.
                                581                 :  *
                                582                 :  * Caller must free the returned entry with FreeFakeRelcacheEntry().
                                583                 :  */
                                584                 : Relation
  277 rhaas                     585 GNC       36169 : CreateFakeRelcacheEntry(RelFileLocator rlocator)
                                586                 : {
 5414 heikki.linnakangas        587 ECB             :     FakeRelCacheEntry fakeentry;
                                588                 :     Relation    rel;
                                589                 : 
                                590                 :     /* Allocate the Relation struct and all related space in one block. */
 5414 heikki.linnakangas        591 GIC       36169 :     fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
                                592           36169 :     rel = (Relation) fakeentry;
 8198 vadim4o                   593 ECB             : 
 5414 heikki.linnakangas        594 GIC       36169 :     rel->rd_rel = &fakeentry->pgc;
  277 rhaas                     595 GNC       36169 :     rel->rd_locator = rlocator;
                                596                 : 
                                597                 :     /*
                                598                 :      * We will never be working with temp rels during recovery or while
                                599                 :      * syncing WAL-skipped files.
                                600                 :      */
 4605 tgl                       601 GIC       36169 :     rel->rd_backend = InvalidBackendId;
 7856 tgl                       602 ECB             : 
 1100 noah                      603                 :     /* It must be a permanent table here */
 3859 rhaas                     604 GIC       36169 :     rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
 3859 rhaas                     605 ECB             : 
                                606                 :     /* We don't know the name of the relation; use relfilenumber instead */
  193 rhaas                     607 GNC       36169 :     sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
                                608                 : 
                                609                 :     /*
                                610                 :      * We set up the lockRelId in case anything tries to lock the dummy
                                611                 :      * relation.  Note that this is fairly bogus since relNumber may be
                                612                 :      * different from the relation's OID.  It shouldn't really matter though.
                                613                 :      * In recovery, we are running by ourselves and can't have any lock
 1100 noah                      614 ECB             :      * conflicts.  While syncing, we already hold AccessExclusiveLock.
                                615                 :      */
  277 rhaas                     616 GNC       36169 :     rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
  193                           617           36169 :     rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
 8198 vadim4o                   618 ECB             : 
 5414 heikki.linnakangas        619 CBC       36169 :     rel->rd_smgr = NULL;
 8198 vadim4o                   620 ECB             : 
 5414 heikki.linnakangas        621 GIC       36169 :     return rel;
                                622                 : }
                                623                 : 
                                624                 : /*
                                625                 :  * Free a fake relation cache entry.
                                626                 :  */
                                627                 : void
                                628           36169 : FreeFakeRelcacheEntry(Relation fakerel)
 8206 vadim4o                   629 ECB             : {
                                630                 :     /* make sure the fakerel is not referenced by the SmgrRelation anymore */
 3320 heikki.linnakangas        631 CBC       36169 :     if (fakerel->rd_smgr != NULL)
                                632           23827 :         smgrclearowner(&fakerel->rd_smgr, fakerel->rd_smgr);
 5414 heikki.linnakangas        633 GIC       36169 :     pfree(fakerel);
 8206 vadim4o                   634           36169 : }
                                635                 : 
                                636                 : /*
                                637                 :  * Drop a relation during XLOG replay
                                638                 :  *
                                639                 :  * This is called when the relation is about to be deleted; we need to remove
 5414 heikki.linnakangas        640 ECB             :  * any open "invalid-page" records for the relation.
                                641                 :  */
                                642                 : void
  277 rhaas                     643 GNC       24364 : XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
                                644                 : {
                                645           24364 :     forget_invalid_pages(rlocator, forknum, 0);
 6997 tgl                       646 GIC       24364 : }
                                647                 : 
 6220 tgl                       648 ECB             : /*
                                649                 :  * Drop a whole database during XLOG replay
                                650                 :  *
                                651                 :  * As above, but for DROP DATABASE instead of dropping a single rel
                                652                 :  */
                                653                 : void
 6220 tgl                       654 GIC           9 : XLogDropDatabase(Oid dbid)
                                655                 : {
                                656                 :     /*
                                657                 :      * This is unnecessarily heavy-handed, as it will close SMgrRelation
                                658                 :      * objects for other databases as well. DROP DATABASE occurs seldom enough
 5050 bruce                     659 ECB             :      * that it's not worth introducing a variant of smgrclose for just this
                                660                 :      * purpose. XXX: Or should we rather leave the smgr entries dangling?
                                661                 :      */
 5414 heikki.linnakangas        662 CBC           9 :     smgrcloseall();
 6204 tgl                       663 ECB             : 
 6204 tgl                       664 GIC           9 :     forget_invalid_pages_db(dbid);
                                665               9 : }
                                666                 : 
                                667                 : /*
                                668                 :  * Truncate a relation during XLOG replay
                                669                 :  *
                                670                 :  * We need to clean up any open "invalid-page" records for the dropped pages.
                                671                 :  */
                                672                 : void
  277 rhaas                     673 GNC          42 : XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum,
                                674                 :                      BlockNumber nblocks)
                                675                 : {
                                676              42 :     forget_invalid_pages(rlocator, forkNum, nblocks);
 6220 tgl                       677 GIC          42 : }
                                678                 : 
                                679                 : /*
                                680                 :  * Determine which timeline to read an xlog page from and set the
                                681                 :  * XLogReaderState's currTLI to that timeline ID.
                                682                 :  *
                                683                 :  * We care about timelines in xlogreader when we might be reading xlog
                                684                 :  * generated prior to a promotion, either if we're currently a standby in
                                685                 :  * recovery or if we're a promoted primary reading xlogs generated by the old
                                686                 :  * primary before our promotion.
                                687                 :  *
                                688                 :  * wantPage must be set to the start address of the page to read and
                                689                 :  * wantLength to the amount of the page that will be read, up to
                                690                 :  * XLOG_BLCKSZ. If the amount to be read isn't known, pass XLOG_BLCKSZ.
                                691                 :  *
                                692                 :  * The currTLI argument should be the system-wide current timeline.
                                693                 :  * Note that this may be different from state->currTLI, which is the timeline
                                694                 :  * from which the caller is currently reading previous xlog records.
                                695                 :  *
                                696                 :  * We switch to an xlog segment from the new timeline eagerly when on a
                                697                 :  * historical timeline, as soon as we reach the start of the xlog segment
                                698                 :  * containing the timeline switch.  The server copied the segment to the new
                                699                 :  * timeline so all the data up to the switch point is the same, but there's no
                                700                 :  * guarantee the old segment will still exist. It may have been deleted or
                                701                 :  * renamed with a .partial suffix so we can't necessarily keep reading from
                                702                 :  * the old TLI even though tliSwitchPoint says it's OK.
                                703                 :  *
                                704                 :  * We can't just check the timeline when we read a page on a different segment
                                705                 :  * to the last page. We could've received a timeline switch from a cascading
 2184 peter_e                   706 ECB             :  * upstream, so the current segment ends abruptly (possibly getting renamed to
                                707                 :  * .partial) and we have to switch to a new one.  Even in the middle of reading
                                708                 :  * a page we could have to dump the cached page and switch to a new TLI.
 2209 simon                     709                 :  *
                                710                 :  * Because of this, callers MAY NOT assume that currTLI is the timeline that
                                711                 :  * will be in a page's xlp_tli; the page may begin on an older timeline or we
                                712                 :  * might be reading from historical timeline data on a segment that's been
                                713                 :  * copied to a new timeline.
                                714                 :  *
                                715                 :  * The caller must also make sure it doesn't read past the current replay
                                716                 :  * position (using GetXLogReplayRecPtr) if executing in recovery, so it
                                717                 :  * doesn't fail to notice that the current timeline became historical.
                                718                 :  */
                                719                 : void
  520 rhaas                     720 GIC       34901 : XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
                                721                 :                           uint32 wantLength, TimeLineID currTLI)
                                722                 : {
  699 tmunro                    723           34901 :     const XLogRecPtr lastReadPage = (state->seg.ws_segno *
                                724           34901 :                                      state->segcxt.ws_segsize + state->segoff);
 2209 simon                     725 ECB             : 
 2209 simon                     726 CBC       34901 :     Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
 2209 simon                     727 GBC       34901 :     Assert(wantLength <= XLOG_BLCKSZ);
                                728           34901 :     Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
  520 rhaas                     729 GIC       34901 :     Assert(currTLI != 0);
                                730                 : 
                                731                 :     /*
                                732                 :      * If the desired page is currently read in and valid, we have nothing to
                                733                 :      * do.
                                734                 :      *
                                735                 :      * The caller should've ensured that it didn't previously advance readOff
                                736                 :      * past the valid limit of this timeline, so it doesn't matter if the
                                737                 :      * current TLI has since become historical.
                                738                 :      */
 2209 simon                     739           34901 :     if (lastReadPage == wantPage &&
  699 tmunro                    740            1863 :         state->readLen != 0 &&
 2153 bruce                     741 LBC           0 :         lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
 2209 simon                     742 UIC           0 :         return;
 2209 simon                     743 ECB             : 
                                744                 :     /*
                                745                 :      * If we're reading from the current timeline, it hasn't become historical
                                746                 :      * and the page we're reading is after the last page read, we can again
                                747                 :      * just carry on. (Seeking backwards requires a check to make sure the
                                748                 :      * older page isn't on a prior timeline).
                                749                 :      *
                                750                 :      * currTLI might've become historical since the caller obtained the value,
                                751                 :      * but the caller is required not to read past the flush limit it saw at
  520 rhaas                     752                 :      * the time it looked up the timeline. There's nothing we can do about it
                                753                 :      * if StartupXLOG() renames it to .partial concurrently.
 2209 simon                     754                 :      */
  520 rhaas                     755 CBC       34901 :     if (state->currTLI == currTLI && wantPage >= lastReadPage)
 2209 simon                     756 ECB             :     {
 2209 simon                     757 CBC       33138 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
 2209 simon                     758 GIC       33138 :         return;
                                759                 :     }
                                760                 : 
                                761                 :     /*
                                762                 :      * If we're just reading pages from a previously validated historical
                                763                 :      * timeline and the timeline we're reading from is valid until the end of
                                764                 :      * the current segment we can just keep reading.
                                765                 :      */
                                766            1763 :     if (state->currTLIValidUntil != InvalidXLogRecPtr &&
  520 rhaas                     767             570 :         state->currTLI != currTLI &&
 2209 simon                     768             570 :         state->currTLI != 0 &&
 1293 alvherre                  769             570 :         ((wantPage + wantLength) / state->segcxt.ws_segsize) <
                                770             570 :         (state->currTLIValidUntil / state->segcxt.ws_segsize))
 2209 simon                     771             569 :         return;
                                772                 : 
                                773                 :     /*
                                774                 :      * If we reach this point we're either looking up a page for random
                                775                 :      * access, the current timeline just became historical, or we're reading
 2153 bruce                     776 ECB             :      * from a new segment containing a timeline switch. In all cases we need
                                777                 :      * to determine the newest timeline on the segment.
                                778                 :      *
 2209 simon                     779                 :      * If it's the current timeline we can just keep reading from here unless
                                780                 :      * we detect a timeline switch that makes the current timeline historical.
                                781                 :      * If it's a historical timeline we can read all the segment on the newest
                                782                 :      * timeline because it contains all the old timelines' data too. So only
                                783                 :      * one switch check is required.
                                784                 :      */
                                785                 :     {
                                786                 :         /*
                                787                 :          * We need to re-read the timeline history in case it's been changed
                                788                 :          * by a promotion or replay from a cascaded replica.
                                789                 :          */
  520 rhaas                     790 GIC        1194 :         List       *timelineHistory = readTimeLineHistory(currTLI);
                                791                 :         XLogRecPtr  endOfSegment;
 2209 simon                     792 ECB             : 
 1293 alvherre                  793 GIC        1194 :         endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
                                794            1194 :             state->segcxt.ws_segsize - 1;
 1293 alvherre                  795 CBC        1194 :         Assert(wantPage / state->segcxt.ws_segsize ==
                                796                 :                endOfSegment / state->segcxt.ws_segsize);
 2209 simon                     797 ECB             : 
                                798                 :         /*
                                799                 :          * Find the timeline of the last LSN on the segment containing
                                800                 :          * wantPage.
                                801                 :          */
 2209 simon                     802 GIC        1194 :         state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
                                803            1194 :         state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
                                804                 :                                                   &state->nextTLI);
 2209 simon                     805 ECB             : 
 2209 simon                     806 GIC        1194 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
                                807                 :                wantPage + wantLength < state->currTLIValidUntil);
 2209 simon                     808 ECB             : 
 2209 simon                     809 GIC        1194 :         list_free_deep(timelineHistory);
                                810                 : 
 2209 simon                     811 CBC        1194 :         elog(DEBUG3, "switched to timeline %u valid until %X/%X",
 2153 bruce                     812 ECB             :              state->currTLI,
  775 peter                     813                 :              LSN_FORMAT_ARGS(state->currTLIValidUntil));
 2209 simon                     814                 :     }
                                815                 : }
 2209 simon                     816 EUB             : 
 1066 alvherre                  817                 : /* XLogReaderRoutine->segment_open callback for local pg_wal files */
                                818                 : void
 1066 alvherre                  819 GIC         716 : wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
                                820                 :                  TimeLineID *tli_p)
                                821                 : {
 1231 alvherre                  822 GBC         716 :     TimeLineID  tli = *tli_p;
                                823                 :     char        path[MAXPGPATH];
                                824                 : 
 1061 alvherre                  825 GIC         716 :     XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
                                826             716 :     state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
                                827             716 :     if (state->seg.ws_file >= 0)
                                828             716 :         return;
                                829                 : 
 1231 alvherre                  830 LBC           0 :     if (errno == ENOENT)
 1231 alvherre                  831 UIC           0 :         ereport(ERROR,
 1231 alvherre                  832 ECB             :                 (errcode_for_file_access(),
                                833                 :                  errmsg("requested WAL segment %s has already been removed",
                                834                 :                         path)));
                                835                 :     else
 1231 alvherre                  836 UIC           0 :         ereport(ERROR,
                                837                 :                 (errcode_for_file_access(),
                                838                 :                  errmsg("could not open file \"%s\": %m",
                                839                 :                         path)));
                                840                 : }
                                841                 : 
                                842                 : /* stock XLogReaderRoutine->segment_close callback */
                                843                 : void
 1066 alvherre                  844 GIC        1237 : wal_segment_close(XLogReaderState *state)
                                845                 : {
                                846            1237 :     close(state->seg.ws_file);
                                847                 :     /* need to check errno? */
                                848            1237 :     state->seg.ws_file = -1;
 1066 alvherre                  849 CBC        1237 : }
                                850                 : 
                                851                 : /*
 1066 alvherre                  852 ECB             :  * XLogReaderRoutine->page_read callback for reading local xlog files
                                853                 :  *
                                854                 :  * Public because it would likely be very helpful for someone writing another
                                855                 :  * output method outside walsender, e.g. in a bgworker.
                                856                 :  *
                                857                 :  * TODO: The walsender has its own version of this, but it relies on the
                                858                 :  * walsender's latch being set whenever WAL is flushed. No such infrastructure
                                859                 :  * exists for normal backends, so we have to do a check/sleep/repeat style of
                                860                 :  * loop for now.
 2636 simon                     861                 :  */
                                862                 : int
  699 tmunro                    863 GIC       19439 : read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                864                 :                      int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
  366 jdavis                    865 ECB             : {
  366 jdavis                    866 GIC       19439 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                867                 :                                      targetRecPtr, cur_page, true);
                                868                 : }
                                869                 : 
                                870                 : /*
                                871                 :  * Same as read_local_xlog_page except that it doesn't wait for future WAL
                                872                 :  * to be available.
  366 jdavis                    873 ECB             :  */
                                874                 : int
  366 jdavis                    875 GIC        3827 : read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                876                 :                              int reqLen, XLogRecPtr targetRecPtr,
                                877                 :                              char *cur_page)
                                878                 : {
                                879            3827 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                880                 :                                      targetRecPtr, cur_page, false);
                                881                 : }
                                882                 : 
                                883                 : /*
  366 jdavis                    884 ECB             :  * Implementation of read_local_xlog_page and its no wait version.
                                885                 :  */
                                886                 : static int
  366 jdavis                    887 GIC       23266 : read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                888                 :                           int reqLen, XLogRecPtr targetRecPtr,
                                889                 :                           char *cur_page, bool wait_for_wal)
                                890                 : {
                                891                 :     XLogRecPtr  read_upto,
                                892                 :                 loc;
 1231 alvherre                  893 ECB             :     TimeLineID  tli;
 2636 simon                     894                 :     int         count;
                                895                 :     WALReadError errinfo;
  520 rhaas                     896                 :     TimeLineID  currTLI;
 2636 simon                     897                 : 
 2636 simon                     898 GIC       23266 :     loc = targetPagePtr + reqLen;
                                899                 : 
                                900                 :     /* Loop waiting for xlog to be available if necessary */
                                901                 :     while (1)
                                902                 :     {
                                903                 :         /*
                                904                 :          * Determine the limit of xlog we can currently read to, and what the
                                905                 :          * most recent timeline is.
                                906                 :          */
 2531 alvherre                  907           24258 :         if (!RecoveryInProgress())
  520 rhaas                     908           24084 :             read_upto = GetFlushRecPtr(&currTLI);
                                909                 :         else
                                910             174 :             read_upto = GetXLogReplayRecPtr(&currTLI);
                                911           24258 :         tli = currTLI;
                                912                 : 
                                913                 :         /*
                                914                 :          * Check which timeline to get the record from.
                                915                 :          *
                                916                 :          * We have to do it each time through the loop because if we're in
                                917                 :          * recovery as a cascading standby, the current timeline might've
                                918                 :          * become historical. We can't rely on RecoveryInProgress() because in
                                919                 :          * a standby configuration like
                                920                 :          *
                                921                 :          * A => B => C
                                922                 :          *
 2209 simon                     923 ECB             :          * if we're a logical decoding session on C, and B gets promoted, our
                                924                 :          * timeline will change while we remain in recovery.
                                925                 :          *
                                926                 :          * We can't just keep reading from the old timeline as the last WAL
                                927                 :          * archive in the timeline will get renamed to .partial by
 2153 bruce                     928                 :          * StartupXLOG().
 2209 simon                     929                 :          *
                                930                 :          * If that happens after our caller determined the TLI but before we
                                931                 :          * actually read the xlog page, we might still try to read from the
 2153 bruce                     932                 :          * old (now renamed) segment and fail. There's not much we can do
                                933                 :          * about this, but it can only happen when we're a leaf of a cascading
                                934                 :          * standby whose primary gets promoted while we're decoding, so a
                                935                 :          * one-off ERROR isn't too bad.
                                936                 :          */
  520 rhaas                     937 GIC       24258 :         XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
                                938                 : 
                                939           24258 :         if (state->currTLI == currTLI)
 2209 simon                     940 ECB             :         {
                                941                 : 
 2209 simon                     942 CBC       23688 :             if (loc <= read_upto)
                                943           22688 :                 break;
                                944                 : 
                                945                 :             /* If asked, let's not wait for future WAL. */
  366 jdavis                    946            1000 :             if (!wait_for_wal)
  344 jdavis                    947 ECB             :             {
                                948                 :                 ReadLocalXLogPageNoWaitPrivate *private_data;
                                949                 : 
                                950                 :                 /*
                                951                 :                  * Inform the caller of read_local_xlog_page_no_wait that the
                                952                 :                  * end of WAL has been reached.
                                953                 :                  */
  344 jdavis                    954 GIC           8 :                 private_data = (ReadLocalXLogPageNoWaitPrivate *)
                                955                 :                     state->private_data;
                                956               8 :                 private_data->end_of_wal = true;
  366                           957               8 :                 break;
                                958                 :             }
  366 jdavis                    959 ECB             : 
 2209 simon                     960 GIC         992 :             CHECK_FOR_INTERRUPTS();
                                961             992 :             pg_usleep(1000L);
                                962                 :         }
                                963                 :         else
                                964                 :         {
                                965                 :             /*
                                966                 :              * We're on a historical timeline, so limit reading to the switch
                                967                 :              * point where we moved to the next timeline.
                                968                 :              *
 2209 simon                     969 ECB             :              * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
                                970                 :              * about the new timeline, so we must've received past the end of
                                971                 :              * it.
                                972                 :              */
 2209 simon                     973 GIC         570 :             read_upto = state->currTLIValidUntil;
                                974                 : 
                                975                 :             /*
 1231 alvherre                  976 ECB             :              * Setting tli to our wanted record's TLI is slightly wrong; the
                                977                 :              * page might begin on an older timeline if it contains a timeline
                                978                 :              * switch, since its xlog segment will have been copied from the
                                979                 :              * prior timeline. This is pretty harmless though, as nothing
                                980                 :              * cares so long as the timeline doesn't go backwards.  We should
                                981                 :              * read the page header instead; FIXME someday.
 2209 simon                     982                 :              */
 1231 alvherre                  983 GIC         570 :             tli = state->currTLI;
 2209 simon                     984 ECB             : 
                                985                 :             /* No need to wait on a historical timeline */
 2209 simon                     986 GIC         570 :             break;
 2209 simon                     987 ECB             :         }
                                988                 :     }
                                989                 : 
 2566 alvherre                  990 GIC       23266 :     if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
                                991                 :     {
 2566 alvherre                  992 ECB             :         /*
                                993                 :          * more than one block available; read only that block, have caller
                                994                 :          * come back if they need more.
                                995                 :          */
 2636 simon                     996 GIC       22570 :         count = XLOG_BLCKSZ;
                                997                 :     }
 2566 alvherre                  998             696 :     else if (targetPagePtr + reqLen > read_upto)
                                999                 :     {
 2566 alvherre                 1000 ECB             :         /* not enough data there */
  699 tmunro                   1001 GIC           8 :         return -1;
 2566 alvherre                 1002 EUB             :     }
                               1003                 :     else
                               1004                 :     {
 2566 alvherre                 1005 ECB             :         /* enough bytes available to satisfy the request */
 2566 alvherre                 1006 GIC         688 :         count = read_upto - targetPagePtr;
                               1007                 :     }
                               1008                 : 
                               1009                 :     /*
                               1010                 :      * Even though we just determined how much of the page can be validly read
                               1011                 :      * as 'count', read the whole page anyway. It's guaranteed to be
                               1012                 :      * zero-padded up to the page boundary if it's incomplete.
 2566 alvherre                 1013 EUB             :      */
  699 tmunro                   1014 GIC       23258 :     if (!WALRead(state, cur_page, targetPagePtr, XLOG_BLCKSZ, tli,
  699 tmunro                   1015 EUB             :                  &errinfo))
 1231 alvherre                 1016 UIC           0 :         WALReadRaiseError(&errinfo);
                               1017                 : 
 2566 alvherre                 1018 EUB             :     /* number of valid bytes in the buffer */
  699 tmunro                   1019 GIC       23258 :     return count;
 2636 simon                    1020 EUB             : }
                               1021                 : 
 1231 alvherre                 1022                 : /*
                               1023                 :  * Backend-specific convenience code to handle read errors encountered by
                               1024                 :  * WALRead().
                               1025                 :  */
                               1026                 : void
 1231 alvherre                 1027 UIC           0 : WALReadRaiseError(WALReadError *errinfo)
 1231 alvherre                 1028 EUB             : {
 1231 alvherre                 1029 UIC           0 :     WALOpenSegment *seg = &errinfo->wre_seg;
 1223 michael                  1030 EUB             :     char        fname[MAXFNAMELEN];
                               1031                 : 
 1223 michael                  1032 UIC           0 :     XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
                               1033                 : 
 1231 alvherre                 1034               0 :     if (errinfo->wre_read < 0)
                               1035                 :     {
 1231 alvherre                 1036 UBC           0 :         errno = errinfo->wre_errno;
 1231 alvherre                 1037 UIC           0 :         ereport(ERROR,
                               1038                 :                 (errcode_for_file_access(),
                               1039                 :                  errmsg("could not read from WAL segment %s, offset %d: %m",
                               1040                 :                         fname, errinfo->wre_off)));
                               1041                 :     }
                               1042               0 :     else if (errinfo->wre_read == 0)
                               1043                 :     {
                               1044               0 :         ereport(ERROR,
                               1045                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
                               1046                 :                  errmsg("could not read from WAL segment %s, offset %d: read %d of %d",
                               1047                 :                         fname, errinfo->wre_off, errinfo->wre_read,
                               1048                 :                         errinfo->wre_req)));
                               1049                 :     }
                               1050               0 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a