LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - xlogutils.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 75.7 % 243 184 10 8 29 12 7 115 24 38 35 122 5 16
Current Date: 2023-04-08 15:15:32 Functions: 90.9 % 22 20 1 1 14 6 1 18 1 2
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * xlogutils.c
       4                 :  *
       5                 :  * PostgreSQL write-ahead log manager utility routines
       6                 :  *
       7                 :  * This file contains support routines that are used by XLOG replay functions.
       8                 :  * None of this code is used during normal system operation.
       9                 :  *
      10                 :  *
      11                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      12                 :  * Portions Copyright (c) 1994, Regents of the University of California
      13                 :  *
      14                 :  * src/backend/access/transam/xlogutils.c
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include <unistd.h>
      21                 : 
      22                 : #include "access/timeline.h"
      23                 : #include "access/xlogrecovery.h"
      24                 : #include "access/xlog_internal.h"
      25                 : #include "access/xlogprefetcher.h"
      26                 : #include "access/xlogutils.h"
      27                 : #include "miscadmin.h"
      28                 : #include "pgstat.h"
      29                 : #include "storage/fd.h"
      30                 : #include "storage/smgr.h"
      31                 : #include "utils/guc.h"
      32                 : #include "utils/hsearch.h"
      33                 : #include "utils/rel.h"
      34                 : 
      35                 : 
      36                 : /* GUC variable */
      37                 : bool        ignore_invalid_pages = false;
      38                 : 
      39                 : /*
      40                 :  * Are we doing recovery from XLOG?
      41                 :  *
      42                 :  * This is only ever true in the startup process; it should be read as meaning
      43                 :  * "this process is replaying WAL records", rather than "the system is in
      44                 :  * recovery mode".  It should be examined primarily by functions that need
      45                 :  * to act differently when called from a WAL redo function (e.g., to skip WAL
      46                 :  * logging).  To check whether the system is in recovery regardless of which
      47                 :  * process you're running in, use RecoveryInProgress() but only after shared
      48                 :  * memory startup and lock initialization.
      49                 :  *
      50                 :  * This is updated from xlog.c and xlogrecovery.c, but lives here because
      51                 :  * it's mostly read by WAL redo functions.
      52                 :  */
      53                 : bool        InRecovery = false;
      54                 : 
      55                 : /* Are we in Hot Standby mode? Only valid in startup process, see xlogutils.h */
      56                 : HotStandbyState standbyState = STANDBY_DISABLED;
      57                 : 
      58                 : /*
      59                 :  * During XLOG replay, we may see XLOG records for incremental updates of
      60                 :  * pages that no longer exist, because their relation was later dropped or
      61                 :  * truncated.  (Note: this is only possible when full_page_writes = OFF,
      62                 :  * since when it's ON, the first reference we see to a page should always
      63                 :  * be a full-page rewrite not an incremental update.)  Rather than simply
      64                 :  * ignoring such records, we make a note of the referenced page, and then
      65                 :  * complain if we don't actually see a drop or truncate covering the page
      66                 :  * later in replay.
      67                 :  */
      68                 : typedef struct xl_invalid_page_key
      69                 : {
      70                 :     RelFileLocator locator;     /* the relation */
      71                 :     ForkNumber  forkno;         /* the fork number */
      72                 :     BlockNumber blkno;          /* the page */
      73                 : } xl_invalid_page_key;
      74                 : 
      75                 : typedef struct xl_invalid_page
      76                 : {
      77                 :     xl_invalid_page_key key;    /* hash key ... must be first */
      78                 :     bool        present;        /* page existed but contained zeroes */
      79                 : } xl_invalid_page;
      80                 : 
      81                 : static HTAB *invalid_page_tab = NULL;
      82                 : 
      83                 : static int  read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
      84                 :                                       int reqLen, XLogRecPtr targetRecPtr,
      85                 :                                       char *cur_page, bool wait_for_wal);
      86                 : 
      87                 : /* Report a reference to an invalid page */
      88                 : static void
      89 UNC           0 : report_invalid_page(int elevel, RelFileLocator locator, ForkNumber forkno,
      90                 :                     BlockNumber blkno, bool present)
      91                 : {
      92               0 :     char       *path = relpathperm(locator, forkno);
      93                 : 
      94 UBC           0 :     if (present)
      95               0 :         elog(elevel, "page %u of relation %s is uninitialized",
      96                 :              blkno, path);
      97                 :     else
      98               0 :         elog(elevel, "page %u of relation %s does not exist",
      99                 :              blkno, path);
     100               0 :     pfree(path);
     101               0 : }
     102                 : 
     103                 : /* Log a reference to an invalid page */
     104                 : static void
     105 GNC          45 : log_invalid_page(RelFileLocator locator, ForkNumber forkno, BlockNumber blkno,
     106                 :                  bool present)
     107                 : {
     108                 :     xl_invalid_page_key key;
     109                 :     xl_invalid_page *hentry;
     110                 :     bool        found;
     111                 : 
     112                 :     /*
     113                 :      * Once recovery has reached a consistent state, the invalid-page table
     114                 :      * should be empty and remain so. If a reference to an invalid page is
     115                 :      * found after consistency is reached, PANIC immediately. This might seem
     116                 :      * aggressive, but it's better than letting the invalid reference linger
     117                 :      * in the hash table until the end of recovery and PANIC there, which
     118                 :      * might come only much later if this is a standby server.
     119                 :      */
     120 CBC          45 :     if (reachedConsistency)
     121                 :     {
     122 UNC           0 :         report_invalid_page(WARNING, locator, forkno, blkno, present);
     123 UBC           0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
     124                 :              "WAL contains references to invalid pages");
     125                 :     }
     126                 : 
     127                 :     /*
     128                 :      * Log references to invalid pages at DEBUG1 level.  This allows some
     129                 :      * tracing of the cause (note the elog context mechanism will tell us
     130                 :      * something about the XLOG record that generated the reference).
     131                 :      */
     132 CBC          45 :     if (message_level_is_interesting(DEBUG1))
     133 UNC           0 :         report_invalid_page(DEBUG1, locator, forkno, blkno, present);
     134                 : 
     135 CBC          45 :     if (invalid_page_tab == NULL)
     136                 :     {
     137                 :         /* create hash table when first needed */
     138                 :         HASHCTL     ctl;
     139                 : 
     140               1 :         ctl.keysize = sizeof(xl_invalid_page_key);
     141               1 :         ctl.entrysize = sizeof(xl_invalid_page);
     142                 : 
     143               1 :         invalid_page_tab = hash_create("XLOG invalid-page table",
     144                 :                                        100,
     145                 :                                        &ctl,
     146                 :                                        HASH_ELEM | HASH_BLOBS);
     147                 :     }
     148                 : 
     149                 :     /* we currently assume xl_invalid_page_key contains no padding */
     150 GNC          45 :     key.locator = locator;
     151 CBC          45 :     key.forkno = forkno;
     152              45 :     key.blkno = blkno;
     153                 :     hentry = (xl_invalid_page *)
     154 GNC          45 :         hash_search(invalid_page_tab, &key, HASH_ENTER, &found);
     155                 : 
     156 CBC          45 :     if (!found)
     157                 :     {
     158                 :         /* hash_search already filled in the key */
     159              45 :         hentry->present = present;
     160                 :     }
     161                 :     else
     162                 :     {
     163                 :         /* repeat reference ... leave "present" as it was */
     164                 :     }
     165              45 : }
     166                 : 
     167                 : /* Forget any invalid pages >= minblkno, because they've been dropped */
     168                 : static void
     169 GNC       24406 : forget_invalid_pages(RelFileLocator locator, ForkNumber forkno,
     170                 :                      BlockNumber minblkno)
     171                 : {
     172                 :     HASH_SEQ_STATUS status;
     173                 :     xl_invalid_page *hentry;
     174                 : 
     175 GIC       24406 :     if (invalid_page_tab == NULL)
     176 CBC       24390 :         return;                 /* nothing to do */
     177 ECB             : 
     178 GIC          16 :     hash_seq_init(&status, invalid_page_tab);
     179 ECB             : 
     180 GIC         601 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
     181 ECB             :     {
     182 GNC         585 :         if (RelFileLocatorEquals(hentry->key.locator, locator) &&
     183 CBC          45 :             hentry->key.forkno == forkno &&
     184              45 :             hentry->key.blkno >= minblkno)
     185 ECB             :         {
     186 GIC          45 :             if (message_level_is_interesting(DEBUG2))
     187 ECB             :             {
     188 UNC           0 :                 char       *path = relpathperm(hentry->key.locator, forkno);
     189 EUB             : 
     190 UIC           0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
     191 EUB             :                      hentry->key.blkno, path);
     192 UIC           0 :                 pfree(path);
     193 EUB             :             }
     194                 : 
     195 GIC          45 :             if (hash_search(invalid_page_tab,
     196 GNC          45 :                             &hentry->key,
     197 ECB             :                             HASH_REMOVE, NULL) == NULL)
     198 UIC           0 :                 elog(ERROR, "hash table corrupted");
     199 EUB             :         }
     200                 :     }
     201                 : }
     202                 : 
     203                 : /* Forget any invalid pages in a whole database */
     204                 : static void
     205 GIC           9 : forget_invalid_pages_db(Oid dbid)
     206 ECB             : {
     207                 :     HASH_SEQ_STATUS status;
     208                 :     xl_invalid_page *hentry;
     209                 : 
     210 GIC           9 :     if (invalid_page_tab == NULL)
     211 CBC           9 :         return;                 /* nothing to do */
     212 ECB             : 
     213 UIC           0 :     hash_seq_init(&status, invalid_page_tab);
     214 EUB             : 
     215 UIC           0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
     216 EUB             :     {
     217 UNC           0 :         if (hentry->key.locator.dbOid == dbid)
     218 EUB             :         {
     219 UIC           0 :             if (message_level_is_interesting(DEBUG2))
     220 EUB             :             {
     221 UNC           0 :                 char       *path = relpathperm(hentry->key.locator, hentry->key.forkno);
     222 EUB             : 
     223 UIC           0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
     224 EUB             :                      hentry->key.blkno, path);
     225 UIC           0 :                 pfree(path);
     226 EUB             :             }
     227                 : 
     228 UIC           0 :             if (hash_search(invalid_page_tab,
     229 UNC           0 :                             &hentry->key,
     230 EUB             :                             HASH_REMOVE, NULL) == NULL)
     231 UIC           0 :                 elog(ERROR, "hash table corrupted");
     232 EUB             :         }
     233                 :     }
     234                 : }
     235                 : 
     236                 : /* Are there any unresolved references to invalid pages? */
     237                 : bool
     238 GIC         166 : XLogHaveInvalidPages(void)
     239 ECB             : {
     240 GIC         166 :     if (invalid_page_tab != NULL &&
     241 LBC           0 :         hash_get_num_entries(invalid_page_tab) > 0)
     242 UBC           0 :         return true;
     243 GBC         166 :     return false;
     244 ECB             : }
     245                 : 
     246                 : /* Complain about any remaining invalid-page entries */
     247                 : void
     248 GIC          75 : XLogCheckInvalidPages(void)
     249 ECB             : {
     250                 :     HASH_SEQ_STATUS status;
     251                 :     xl_invalid_page *hentry;
     252 GIC          75 :     bool        foundone = false;
     253 ECB             : 
     254 GIC          75 :     if (invalid_page_tab == NULL)
     255 CBC          75 :         return;                 /* nothing to do */
     256 ECB             : 
     257 UIC           0 :     hash_seq_init(&status, invalid_page_tab);
     258 EUB             : 
     259                 :     /*
     260                 :      * Our strategy is to emit WARNING messages for all remaining entries and
     261                 :      * only PANIC after we've dumped all the available info.
     262                 :      */
     263 UIC           0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
     264 EUB             :     {
     265 UNC           0 :         report_invalid_page(WARNING, hentry->key.locator, hentry->key.forkno,
     266 UBC           0 :                             hentry->key.blkno, hentry->present);
     267               0 :         foundone = true;
     268 EUB             :     }
     269                 : 
     270 UIC           0 :     if (foundone)
     271 UBC           0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
     272 EUB             :              "WAL contains references to invalid pages");
     273                 : 
     274 UIC           0 :     hash_destroy(invalid_page_tab);
     275 UBC           0 :     invalid_page_tab = NULL;
     276 EUB             : }
     277                 : 
     278                 : 
     279                 : /*
     280                 :  * XLogReadBufferForRedo
     281                 :  *      Read a page during XLOG replay
     282                 :  *
     283                 :  * Reads a block referenced by a WAL record into shared buffer cache, and
     284                 :  * determines what needs to be done to redo the changes to it.  If the WAL
     285                 :  * record includes a full-page image of the page, it is restored.
     286                 :  *
     287                 :  * 'record.EndRecPtr' is compared to the page's LSN to determine if the record
     288                 :  * has already been replayed.  'block_id' is the ID number the block was
     289                 :  * registered with, when the WAL record was created.
     290                 :  *
     291                 :  * Returns one of the following:
     292                 :  *
     293                 :  *  BLK_NEEDS_REDO  - changes from the WAL record need to be applied
     294                 :  *  BLK_DONE        - block doesn't need replaying
     295                 :  *  BLK_RESTORED    - block was restored from a full-page image included in
     296                 :  *                    the record
     297                 :  *  BLK_NOTFOUND    - block was not found (because it was truncated away by
     298                 :  *                    an operation later in the WAL stream)
     299                 :  *
     300                 :  * On return, the buffer is locked in exclusive-mode, and returned in *buf.
     301                 :  * Note that the buffer is locked and returned even if it doesn't need
     302                 :  * replaying.  (Getting the buffer lock is not really necessary during
     303                 :  * single-process crash recovery, but some subroutines such as MarkBufferDirty
     304                 :  * will complain if we don't have the lock.  In hot standby mode it's
     305                 :  * definitely necessary.)
     306                 :  *
     307                 :  * Note: when a backup block is available in XLOG with the BKPIMAGE_APPLY flag
     308                 :  * set, we restore it, even if the page in the database appears newer.  This
     309                 :  * is to protect ourselves against database pages that were partially or
     310                 :  * incorrectly written during a crash.  We assume that the XLOG data must be
     311                 :  * good because it has passed a CRC check, while the database page might not
     312                 :  * be.  This will force us to replay all subsequent modifications of the page
     313                 :  * that appear in XLOG, rather than possibly ignoring them as already
     314                 :  * applied, but that's not a huge drawback.
     315                 :  */
     316                 : XLogRedoAction
     317 GIC     2591739 : XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
     318 ECB             :                       Buffer *buf)
     319                 : {
     320 GIC     2591739 :     return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
     321 ECB             :                                          false, buf);
     322                 : }
     323                 : 
     324                 : /*
     325                 :  * Pin and lock a buffer referenced by a WAL record, for the purpose of
     326                 :  * re-initializing it.
     327                 :  */
     328                 : Buffer
     329 GIC       50028 : XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
     330 ECB             : {
     331                 :     Buffer      buf;
     332                 : 
     333 GIC       50028 :     XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
     334 ECB             :                                   &buf);
     335 GIC       50028 :     return buf;
     336 ECB             : }
     337                 : 
     338                 : /*
     339                 :  * XLogReadBufferForRedoExtended
     340                 :  *      Like XLogReadBufferForRedo, but with extra options.
     341                 :  *
     342                 :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
     343                 :  * with all-zeroes pages up to the referenced block number.  In
     344                 :  * RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
     345                 :  * is always BLK_NEEDS_REDO.
     346                 :  *
     347                 :  * (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
     348                 :  * parameter. Do not use an inconsistent combination!)
     349                 :  *
     350                 :  * If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
     351                 :  * using LockBufferForCleanup(), instead of a regular exclusive lock.
     352                 :  */
     353                 : XLogRedoAction
     354 GIC     2654768 : XLogReadBufferForRedoExtended(XLogReaderState *record,
     355 ECB             :                               uint8 block_id,
     356                 :                               ReadBufferMode mode, bool get_cleanup_lock,
     357                 :                               Buffer *buf)
     358                 : {
     359 GIC     2654768 :     XLogRecPtr  lsn = record->EndRecPtr;
     360                 :     RelFileLocator rlocator;
     361                 :     ForkNumber  forknum;
     362                 :     BlockNumber blkno;
     363                 :     Buffer      prefetch_buffer;
     364                 :     Page        page;
     365                 :     bool        zeromode;
     366                 :     bool        willinit;
     367                 : 
     368 GNC     2654768 :     if (!XLogRecGetBlockTagExtended(record, block_id, &rlocator, &forknum, &blkno,
     369 ECB             :                                     &prefetch_buffer))
     370                 :     {
     371                 :         /* Caller specified a bogus block_id */
     372 UIC           0 :         elog(PANIC, "failed to locate backup block with ID %d in WAL record",
     373 EUB             :              block_id);
     374                 :     }
     375                 : 
     376                 :     /*
     377                 :      * Make sure that if the block is marked with WILL_INIT, the caller is
     378                 :      * going to initialize it. And vice versa.
     379                 :      */
     380 GIC     2654768 :     zeromode = (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
     381 CBC     2654768 :     willinit = (XLogRecGetBlock(record, block_id)->flags & BKPBLOCK_WILL_INIT) != 0;
     382         2654768 :     if (willinit && !zeromode)
     383 LBC           0 :         elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
     384 GBC     2654768 :     if (!willinit && zeromode)
     385 LBC           0 :         elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
     386 EUB             : 
     387                 :     /* If it has a full-page image and it should be restored, do it. */
     388 GIC     2654768 :     if (XLogRecBlockImageApply(record, block_id))
     389 ECB             :     {
     390 GIC       36507 :         Assert(XLogRecHasBlockImage(record, block_id));
     391 GNC       36507 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno,
     392 ECB             :                                       get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK,
     393                 :                                       prefetch_buffer);
     394 GIC       36507 :         page = BufferGetPage(*buf);
     395 CBC       36507 :         if (!RestoreBlockImage(record, block_id, page))
     396 LBC           0 :             ereport(ERROR,
     397 EUB             :                     (errcode(ERRCODE_INTERNAL_ERROR),
     398                 :                      errmsg_internal("%s", record->errormsg_buf)));
     399                 : 
     400                 :         /*
     401                 :          * The page may be uninitialized. If so, we can't set the LSN because
     402                 :          * that would corrupt the page.
     403                 :          */
     404 GIC       36507 :         if (!PageIsNew(page))
     405 ECB             :         {
     406 GIC       36492 :             PageSetLSN(page, lsn);
     407 ECB             :         }
     408                 : 
     409 GIC       36507 :         MarkBufferDirty(*buf);
     410 ECB             : 
     411                 :         /*
     412                 :          * At the end of crash recovery the init forks of unlogged relations
     413                 :          * are copied, without going through shared buffers. So we need to
     414                 :          * force the on-disk state of init forks to always be in sync with the
     415                 :          * state in shared buffers.
     416                 :          */
     417 GIC       36507 :         if (forknum == INIT_FORKNUM)
     418 CBC          22 :             FlushOneBuffer(*buf);
     419 ECB             : 
     420 GIC       36507 :         return BLK_RESTORED;
     421 ECB             :     }
     422                 :     else
     423                 :     {
     424 GNC     2618261 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno, mode, prefetch_buffer);
     425 CBC     2618261 :         if (BufferIsValid(*buf))
     426 ECB             :         {
     427 GIC     2618216 :             if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
     428 ECB             :             {
     429 GIC     2567967 :                 if (get_cleanup_lock)
     430 CBC        7003 :                     LockBufferForCleanup(*buf);
     431 ECB             :                 else
     432 GIC     2560964 :                     LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
     433 ECB             :             }
     434 GIC     2618216 :             if (lsn <= PageGetLSN(BufferGetPage(*buf)))
     435 LBC           0 :                 return BLK_DONE;
     436 EUB             :             else
     437 GIC     2618216 :                 return BLK_NEEDS_REDO;
     438 ECB             :         }
     439                 :         else
     440 GIC          45 :             return BLK_NOTFOUND;
     441 ECB             :     }
     442                 : }
     443                 : 
     444                 : /*
     445                 :  * XLogReadBufferExtended
     446                 :  *      Read a page during XLOG replay
     447                 :  *
     448                 :  * This is functionally comparable to ReadBufferExtended. There's some
     449                 :  * differences in the behavior wrt. the "mode" argument:
     450                 :  *
     451                 :  * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we
     452                 :  * return InvalidBuffer. In this case the caller should silently skip the
     453                 :  * update on this page. (In this situation, we expect that the page was later
     454                 :  * dropped or truncated. If we don't see evidence of that later in the WAL
     455                 :  * sequence, we'll complain at the end of WAL replay.)
     456                 :  *
     457                 :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
     458                 :  * with all-zeroes pages up to the given block number.
     459                 :  *
     460                 :  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
     461                 :  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
     462                 :  * to imply that the page should be dropped or truncated later.
     463                 :  *
     464                 :  * Optionally, recent_buffer can be used to provide a hint about the location
     465                 :  * of the page in the buffer pool; it does not have to be correct, but avoids
     466                 :  * a buffer mapping table probe if it is.
     467                 :  *
     468                 :  * NB: A redo function should normally not call this directly. To get a page
     469                 :  * to modify, use XLogReadBufferForRedoExtended instead. It is important that
     470                 :  * all pages modified by a WAL record are registered in the WAL records, or
     471                 :  * they will be invisible to tools that need to know which pages are modified.
     472                 :  */
     473                 : Buffer
     474 GNC     2926216 : XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
     475 ECB             :                        BlockNumber blkno, ReadBufferMode mode,
     476                 :                        Buffer recent_buffer)
     477                 : {
     478                 :     BlockNumber lastblock;
     479                 :     Buffer      buffer;
     480                 :     SMgrRelation smgr;
     481                 : 
     482 GIC     2926216 :     Assert(blkno != P_NEW);
     483 ECB             : 
     484                 :     /* Do we have a clue where the buffer might be already? */
     485 GIC     2926216 :     if (BufferIsValid(recent_buffer) &&
     486 CBC      423615 :         mode == RBM_NORMAL &&
     487 GNC      423615 :         ReadRecentBuffer(rlocator, forknum, blkno, recent_buffer))
     488 ECB             :     {
     489 GIC      422006 :         buffer = recent_buffer;
     490 CBC      422006 :         goto recent_buffer_fast_path;
     491 ECB             :     }
     492                 : 
     493                 :     /* Open the relation at smgr level */
     494 GNC     2504210 :     smgr = smgropen(rlocator, InvalidBackendId);
     495 ECB             : 
     496                 :     /*
     497                 :      * Create the target file if it doesn't already exist.  This lets us cope
     498                 :      * if the replay sequence contains writes to a relation that is later
     499                 :      * deleted.  (The original coding of this routine would instead suppress
     500                 :      * the writes, but that seems like it risks losing valuable data if the
     501                 :      * filesystem loses an inode during a crash.  Better to write the data
     502                 :      * until we are actually told to delete the file.)
     503                 :      */
     504 GIC     2504210 :     smgrcreate(smgr, forknum, true);
     505 ECB             : 
     506 GIC     2504210 :     lastblock = smgrnblocks(smgr, forknum);
     507 ECB             : 
     508 GIC     2504210 :     if (blkno < lastblock)
     509 ECB             :     {
     510                 :         /* page exists in file */
     511 GNC     2467792 :         buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno,
     512 ECB             :                                            mode, NULL, true);
     513                 :     }
     514                 :     else
     515                 :     {
     516                 :         /* hm, page doesn't exist in file */
     517 GIC       36418 :         if (mode == RBM_NORMAL)
     518 ECB             :         {
     519 GNC          45 :             log_invalid_page(rlocator, forknum, blkno, false);
     520 CBC          45 :             return InvalidBuffer;
     521 ECB             :         }
     522 GIC       36373 :         if (mode == RBM_NORMAL_NO_LOG)
     523 LBC           0 :             return InvalidBuffer;
     524 EUB             :         /* OK to extend the file */
     525                 :         /* we do this in recovery only - no rel-extension lock needed */
     526 GIC       36373 :         Assert(InRecovery);
     527 GNC       36373 :         buffer = ExtendBufferedRelTo(EB_SMGR(smgr, RELPERSISTENCE_PERMANENT),
     528                 :                                      forknum,
     529                 :                                      NULL,
     530                 :                                      EB_PERFORMING_RECOVERY |
     531                 :                                      EB_SKIP_EXTENSION_LOCK,
     532                 :                                      blkno + 1,
     533                 :                                      mode);
     534                 :     }
     535 EUB             : 
     536 GBC     2926171 : recent_buffer_fast_path:
     537         2926171 :     if (mode == RBM_NORMAL)
     538                 :     {
     539                 :         /* check that page has been initialized */
     540 GIC     2564524 :         Page        page = (Page) BufferGetPage(buffer);
     541 ECB             : 
     542                 :         /*
     543                 :          * We assume that PageIsNew is safe without a lock. During recovery,
     544                 :          * there should be no other backends that could modify the buffer at
     545                 :          * the same time.
     546                 :          */
     547 GIC     2564524 :         if (PageIsNew(page))
     548                 :         {
     549 UIC           0 :             ReleaseBuffer(buffer);
     550 UNC           0 :             log_invalid_page(rlocator, forknum, blkno, true);
     551 UIC           0 :             return InvalidBuffer;
     552                 :         }
     553                 :     }
     554                 : 
     555 GIC     2926171 :     return buffer;
     556                 : }
     557                 : 
     558                 : /*
     559                 :  * Struct actually returned by CreateFakeRelcacheEntry, though the declared
     560                 :  * return type is Relation.
     561                 :  */
     562                 : typedef struct
     563                 : {
     564                 :     RelationData reldata;       /* Note: this must be first */
     565                 :     FormData_pg_class pgc;
     566                 : } FakeRelCacheEntryData;
     567                 : 
     568                 : typedef FakeRelCacheEntryData *FakeRelCacheEntry;
     569                 : 
     570                 : /*
     571 ECB             :  * Create a fake relation cache entry for a physical relation
     572                 :  *
     573                 :  * It's often convenient to use the same functions in XLOG replay as in the
     574                 :  * main codepath, but those functions typically work with a relcache entry.
     575                 :  * We don't have a working relation cache during XLOG replay, but this
     576                 :  * function can be used to create a fake relcache entry instead. Only the
     577                 :  * fields related to physical storage, like rd_rel, are initialized, so the
     578                 :  * fake entry is only usable in low-level operations like ReadBuffer().
     579                 :  *
     580                 :  * This is also used for syncing WAL-skipped files.
     581                 :  *
     582                 :  * Caller must free the returned entry with FreeFakeRelcacheEntry().
     583                 :  */
     584                 : Relation
     585 GNC       36169 : CreateFakeRelcacheEntry(RelFileLocator rlocator)
     586                 : {
     587 ECB             :     FakeRelCacheEntry fakeentry;
     588                 :     Relation    rel;
     589                 : 
     590                 :     /* Allocate the Relation struct and all related space in one block. */
     591 GIC       36169 :     fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
     592           36169 :     rel = (Relation) fakeentry;
     593 ECB             : 
     594 GIC       36169 :     rel->rd_rel = &fakeentry->pgc;
     595 GNC       36169 :     rel->rd_locator = rlocator;
     596                 : 
     597                 :     /*
     598                 :      * We will never be working with temp rels during recovery or while
     599                 :      * syncing WAL-skipped files.
     600                 :      */
     601 GIC       36169 :     rel->rd_backend = InvalidBackendId;
     602 ECB             : 
     603                 :     /* It must be a permanent table here */
     604 GIC       36169 :     rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
     605 ECB             : 
     606                 :     /* We don't know the name of the relation; use relfilenumber instead */
     607 GNC       36169 :     sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
     608                 : 
     609                 :     /*
     610                 :      * We set up the lockRelId in case anything tries to lock the dummy
     611                 :      * relation.  Note that this is fairly bogus since relNumber may be
     612                 :      * different from the relation's OID.  It shouldn't really matter though.
     613                 :      * In recovery, we are running by ourselves and can't have any lock
     614 ECB             :      * conflicts.  While syncing, we already hold AccessExclusiveLock.
     615                 :      */
     616 GNC       36169 :     rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
     617           36169 :     rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
     618 ECB             : 
     619 CBC       36169 :     rel->rd_smgr = NULL;
     620 ECB             : 
     621 GIC       36169 :     return rel;
     622                 : }
     623                 : 
     624                 : /*
     625                 :  * Free a fake relation cache entry.
     626                 :  */
     627                 : void
     628           36169 : FreeFakeRelcacheEntry(Relation fakerel)
     629 ECB             : {
     630                 :     /* make sure the fakerel is not referenced by the SmgrRelation anymore */
     631 CBC       36169 :     if (fakerel->rd_smgr != NULL)
     632           23827 :         smgrclearowner(&fakerel->rd_smgr, fakerel->rd_smgr);
     633 GIC       36169 :     pfree(fakerel);
     634           36169 : }
     635                 : 
     636                 : /*
     637                 :  * Drop a relation during XLOG replay
     638                 :  *
     639                 :  * This is called when the relation is about to be deleted; we need to remove
     640 ECB             :  * any open "invalid-page" records for the relation.
     641                 :  */
     642                 : void
     643 GNC       24364 : XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
     644                 : {
     645           24364 :     forget_invalid_pages(rlocator, forknum, 0);
     646 GIC       24364 : }
     647                 : 
     648 ECB             : /*
     649                 :  * Drop a whole database during XLOG replay
     650                 :  *
     651                 :  * As above, but for DROP DATABASE instead of dropping a single rel
     652                 :  */
     653                 : void
     654 GIC           9 : XLogDropDatabase(Oid dbid)
     655                 : {
     656                 :     /*
     657                 :      * This is unnecessarily heavy-handed, as it will close SMgrRelation
     658                 :      * objects for other databases as well. DROP DATABASE occurs seldom enough
     659 ECB             :      * that it's not worth introducing a variant of smgrclose for just this
     660                 :      * purpose. XXX: Or should we rather leave the smgr entries dangling?
     661                 :      */
     662 CBC           9 :     smgrcloseall();
     663 ECB             : 
     664 GIC           9 :     forget_invalid_pages_db(dbid);
     665               9 : }
     666                 : 
     667                 : /*
     668                 :  * Truncate a relation during XLOG replay
     669                 :  *
     670                 :  * We need to clean up any open "invalid-page" records for the dropped pages.
     671                 :  */
     672                 : void
     673 GNC          42 : XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum,
     674                 :                      BlockNumber nblocks)
     675                 : {
     676              42 :     forget_invalid_pages(rlocator, forkNum, nblocks);
     677 GIC          42 : }
     678                 : 
     679                 : /*
     680                 :  * Determine which timeline to read an xlog page from and set the
     681                 :  * XLogReaderState's currTLI to that timeline ID.
     682                 :  *
     683                 :  * We care about timelines in xlogreader when we might be reading xlog
     684                 :  * generated prior to a promotion, either if we're currently a standby in
     685                 :  * recovery or if we're a promoted primary reading xlogs generated by the old
     686                 :  * primary before our promotion.
     687                 :  *
     688                 :  * wantPage must be set to the start address of the page to read and
     689                 :  * wantLength to the amount of the page that will be read, up to
     690                 :  * XLOG_BLCKSZ. If the amount to be read isn't known, pass XLOG_BLCKSZ.
     691                 :  *
     692                 :  * The currTLI argument should be the system-wide current timeline.
     693                 :  * Note that this may be different from state->currTLI, which is the timeline
     694                 :  * from which the caller is currently reading previous xlog records.
     695                 :  *
     696                 :  * We switch to an xlog segment from the new timeline eagerly when on a
     697                 :  * historical timeline, as soon as we reach the start of the xlog segment
     698                 :  * containing the timeline switch.  The server copied the segment to the new
     699                 :  * timeline so all the data up to the switch point is the same, but there's no
     700                 :  * guarantee the old segment will still exist. It may have been deleted or
     701                 :  * renamed with a .partial suffix so we can't necessarily keep reading from
     702                 :  * the old TLI even though tliSwitchPoint says it's OK.
     703                 :  *
     704                 :  * We can't just check the timeline when we read a page on a different segment
     705                 :  * to the last page. We could've received a timeline switch from a cascading
     706 ECB             :  * upstream, so the current segment ends abruptly (possibly getting renamed to
     707                 :  * .partial) and we have to switch to a new one.  Even in the middle of reading
     708                 :  * a page we could have to dump the cached page and switch to a new TLI.
     709                 :  *
     710                 :  * Because of this, callers MAY NOT assume that currTLI is the timeline that
     711                 :  * will be in a page's xlp_tli; the page may begin on an older timeline or we
     712                 :  * might be reading from historical timeline data on a segment that's been
     713                 :  * copied to a new timeline.
     714                 :  *
     715                 :  * The caller must also make sure it doesn't read past the current replay
     716                 :  * position (using GetXLogReplayRecPtr) if executing in recovery, so it
     717                 :  * doesn't fail to notice that the current timeline became historical.
     718                 :  */
     719                 : void
     720 GIC       34901 : XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
     721                 :                           uint32 wantLength, TimeLineID currTLI)
     722                 : {
     723           34901 :     const XLogRecPtr lastReadPage = (state->seg.ws_segno *
     724           34901 :                                      state->segcxt.ws_segsize + state->segoff);
     725 ECB             : 
     726 CBC       34901 :     Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
     727 GBC       34901 :     Assert(wantLength <= XLOG_BLCKSZ);
     728           34901 :     Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
     729 GIC       34901 :     Assert(currTLI != 0);
     730                 : 
     731                 :     /*
     732                 :      * If the desired page is currently read in and valid, we have nothing to
     733                 :      * do.
     734                 :      *
     735                 :      * The caller should've ensured that it didn't previously advance readOff
     736                 :      * past the valid limit of this timeline, so it doesn't matter if the
     737                 :      * current TLI has since become historical.
     738                 :      */
     739           34901 :     if (lastReadPage == wantPage &&
     740            1863 :         state->readLen != 0 &&
     741 LBC           0 :         lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
     742 UIC           0 :         return;
     743 ECB             : 
     744                 :     /*
     745                 :      * If we're reading from the current timeline, it hasn't become historical
     746                 :      * and the page we're reading is after the last page read, we can again
     747                 :      * just carry on. (Seeking backwards requires a check to make sure the
     748                 :      * older page isn't on a prior timeline).
     749                 :      *
     750                 :      * currTLI might've become historical since the caller obtained the value,
     751                 :      * but the caller is required not to read past the flush limit it saw at
     752                 :      * the time it looked up the timeline. There's nothing we can do about it
     753                 :      * if StartupXLOG() renames it to .partial concurrently.
     754                 :      */
     755 CBC       34901 :     if (state->currTLI == currTLI && wantPage >= lastReadPage)
     756 ECB             :     {
     757 CBC       33138 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
     758 GIC       33138 :         return;
     759                 :     }
     760                 : 
     761                 :     /*
     762                 :      * If we're just reading pages from a previously validated historical
     763                 :      * timeline and the timeline we're reading from is valid until the end of
     764                 :      * the current segment we can just keep reading.
     765                 :      */
     766            1763 :     if (state->currTLIValidUntil != InvalidXLogRecPtr &&
     767             570 :         state->currTLI != currTLI &&
     768             570 :         state->currTLI != 0 &&
     769             570 :         ((wantPage + wantLength) / state->segcxt.ws_segsize) <
     770             570 :         (state->currTLIValidUntil / state->segcxt.ws_segsize))
     771             569 :         return;
     772                 : 
     773                 :     /*
     774                 :      * If we reach this point we're either looking up a page for random
     775                 :      * access, the current timeline just became historical, or we're reading
     776 ECB             :      * from a new segment containing a timeline switch. In all cases we need
     777                 :      * to determine the newest timeline on the segment.
     778                 :      *
     779                 :      * If it's the current timeline we can just keep reading from here unless
     780                 :      * we detect a timeline switch that makes the current timeline historical.
     781                 :      * If it's a historical timeline we can read all the segment on the newest
     782                 :      * timeline because it contains all the old timelines' data too. So only
     783                 :      * one switch check is required.
     784                 :      */
     785                 :     {
     786                 :         /*
     787                 :          * We need to re-read the timeline history in case it's been changed
     788                 :          * by a promotion or replay from a cascaded replica.
     789                 :          */
     790 GIC        1194 :         List       *timelineHistory = readTimeLineHistory(currTLI);
     791                 :         XLogRecPtr  endOfSegment;
     792 ECB             : 
     793 GIC        1194 :         endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
     794            1194 :             state->segcxt.ws_segsize - 1;
     795 CBC        1194 :         Assert(wantPage / state->segcxt.ws_segsize ==
     796                 :                endOfSegment / state->segcxt.ws_segsize);
     797 ECB             : 
     798                 :         /*
     799                 :          * Find the timeline of the last LSN on the segment containing
     800                 :          * wantPage.
     801                 :          */
     802 GIC        1194 :         state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
     803            1194 :         state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
     804                 :                                                   &state->nextTLI);
     805 ECB             : 
     806 GIC        1194 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
     807                 :                wantPage + wantLength < state->currTLIValidUntil);
     808 ECB             : 
     809 GIC        1194 :         list_free_deep(timelineHistory);
     810                 : 
     811 CBC        1194 :         elog(DEBUG3, "switched to timeline %u valid until %X/%X",
     812 ECB             :              state->currTLI,
     813                 :              LSN_FORMAT_ARGS(state->currTLIValidUntil));
     814                 :     }
     815                 : }
     816 EUB             : 
     817                 : /* XLogReaderRoutine->segment_open callback for local pg_wal files */
     818                 : void
     819 GIC         716 : wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
     820                 :                  TimeLineID *tli_p)
     821                 : {
     822 GBC         716 :     TimeLineID  tli = *tli_p;
     823                 :     char        path[MAXPGPATH];
     824                 : 
     825 GIC         716 :     XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
     826             716 :     state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
     827             716 :     if (state->seg.ws_file >= 0)
     828             716 :         return;
     829                 : 
     830 LBC           0 :     if (errno == ENOENT)
     831 UIC           0 :         ereport(ERROR,
     832 ECB             :                 (errcode_for_file_access(),
     833                 :                  errmsg("requested WAL segment %s has already been removed",
     834                 :                         path)));
     835                 :     else
     836 UIC           0 :         ereport(ERROR,
     837                 :                 (errcode_for_file_access(),
     838                 :                  errmsg("could not open file \"%s\": %m",
     839                 :                         path)));
     840                 : }
     841                 : 
     842                 : /* stock XLogReaderRoutine->segment_close callback */
     843                 : void
     844 GIC        1237 : wal_segment_close(XLogReaderState *state)
     845                 : {
     846            1237 :     close(state->seg.ws_file);
     847                 :     /* need to check errno? */
     848            1237 :     state->seg.ws_file = -1;
     849 CBC        1237 : }
     850                 : 
     851                 : /*
     852 ECB             :  * XLogReaderRoutine->page_read callback for reading local xlog files
     853                 :  *
     854                 :  * Public because it would likely be very helpful for someone writing another
     855                 :  * output method outside walsender, e.g. in a bgworker.
     856                 :  *
     857                 :  * TODO: The walsender has its own version of this, but it relies on the
     858                 :  * walsender's latch being set whenever WAL is flushed. No such infrastructure
     859                 :  * exists for normal backends, so we have to do a check/sleep/repeat style of
     860                 :  * loop for now.
     861                 :  */
     862                 : int
     863 GIC       19439 : read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
     864                 :                      int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
     865 ECB             : {
     866 GIC       19439 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
     867                 :                                      targetRecPtr, cur_page, true);
     868                 : }
     869                 : 
     870                 : /*
     871                 :  * Same as read_local_xlog_page except that it doesn't wait for future WAL
     872                 :  * to be available.
     873 ECB             :  */
     874                 : int
     875 GIC        3827 : read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
     876                 :                              int reqLen, XLogRecPtr targetRecPtr,
     877                 :                              char *cur_page)
     878                 : {
     879            3827 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
     880                 :                                      targetRecPtr, cur_page, false);
     881                 : }
     882                 : 
     883                 : /*
     884 ECB             :  * Implementation of read_local_xlog_page and its no wait version.
     885                 :  */
     886                 : static int
     887 GIC       23266 : read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
     888                 :                           int reqLen, XLogRecPtr targetRecPtr,
     889                 :                           char *cur_page, bool wait_for_wal)
     890                 : {
     891                 :     XLogRecPtr  read_upto,
     892                 :                 loc;
     893 ECB             :     TimeLineID  tli;
     894                 :     int         count;
     895                 :     WALReadError errinfo;
     896                 :     TimeLineID  currTLI;
     897                 : 
     898 GIC       23266 :     loc = targetPagePtr + reqLen;
     899                 : 
     900                 :     /* Loop waiting for xlog to be available if necessary */
     901                 :     while (1)
     902                 :     {
     903                 :         /*
     904                 :          * Determine the limit of xlog we can currently read to, and what the
     905                 :          * most recent timeline is.
     906                 :          */
     907           24258 :         if (!RecoveryInProgress())
     908           24084 :             read_upto = GetFlushRecPtr(&currTLI);
     909                 :         else
     910             174 :             read_upto = GetXLogReplayRecPtr(&currTLI);
     911           24258 :         tli = currTLI;
     912                 : 
     913                 :         /*
     914                 :          * Check which timeline to get the record from.
     915                 :          *
     916                 :          * We have to do it each time through the loop because if we're in
     917                 :          * recovery as a cascading standby, the current timeline might've
     918                 :          * become historical. We can't rely on RecoveryInProgress() because in
     919                 :          * a standby configuration like
     920                 :          *
     921                 :          * A => B => C
     922                 :          *
     923 ECB             :          * if we're a logical decoding session on C, and B gets promoted, our
     924                 :          * timeline will change while we remain in recovery.
     925                 :          *
     926                 :          * We can't just keep reading from the old timeline as the last WAL
     927                 :          * archive in the timeline will get renamed to .partial by
     928                 :          * StartupXLOG().
     929                 :          *
     930                 :          * If that happens after our caller determined the TLI but before we
     931                 :          * actually read the xlog page, we might still try to read from the
     932                 :          * old (now renamed) segment and fail. There's not much we can do
     933                 :          * about this, but it can only happen when we're a leaf of a cascading
     934                 :          * standby whose primary gets promoted while we're decoding, so a
     935                 :          * one-off ERROR isn't too bad.
     936                 :          */
     937 GIC       24258 :         XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
     938                 : 
     939           24258 :         if (state->currTLI == currTLI)
     940 ECB             :         {
     941                 : 
     942 CBC       23688 :             if (loc <= read_upto)
     943           22688 :                 break;
     944                 : 
     945                 :             /* If asked, let's not wait for future WAL. */
     946            1000 :             if (!wait_for_wal)
     947 ECB             :             {
     948                 :                 ReadLocalXLogPageNoWaitPrivate *private_data;
     949                 : 
     950                 :                 /*
     951                 :                  * Inform the caller of read_local_xlog_page_no_wait that the
     952                 :                  * end of WAL has been reached.
     953                 :                  */
     954 GIC           8 :                 private_data = (ReadLocalXLogPageNoWaitPrivate *)
     955                 :                     state->private_data;
     956               8 :                 private_data->end_of_wal = true;
     957               8 :                 break;
     958                 :             }
     959 ECB             : 
     960 GIC         992 :             CHECK_FOR_INTERRUPTS();
     961             992 :             pg_usleep(1000L);
     962                 :         }
     963                 :         else
     964                 :         {
     965                 :             /*
     966                 :              * We're on a historical timeline, so limit reading to the switch
     967                 :              * point where we moved to the next timeline.
     968                 :              *
     969 ECB             :              * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
     970                 :              * about the new timeline, so we must've received past the end of
     971                 :              * it.
     972                 :              */
     973 GIC         570 :             read_upto = state->currTLIValidUntil;
     974                 : 
     975                 :             /*
     976 ECB             :              * Setting tli to our wanted record's TLI is slightly wrong; the
     977                 :              * page might begin on an older timeline if it contains a timeline
     978                 :              * switch, since its xlog segment will have been copied from the
     979                 :              * prior timeline. This is pretty harmless though, as nothing
     980                 :              * cares so long as the timeline doesn't go backwards.  We should
     981                 :              * read the page header instead; FIXME someday.
     982                 :              */
     983 GIC         570 :             tli = state->currTLI;
     984 ECB             : 
     985                 :             /* No need to wait on a historical timeline */
     986 GIC         570 :             break;
     987 ECB             :         }
     988                 :     }
     989                 : 
     990 GIC       23266 :     if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
     991                 :     {
     992 ECB             :         /*
     993                 :          * more than one block available; read only that block, have caller
     994                 :          * come back if they need more.
     995                 :          */
     996 GIC       22570 :         count = XLOG_BLCKSZ;
     997                 :     }
     998             696 :     else if (targetPagePtr + reqLen > read_upto)
     999                 :     {
    1000 ECB             :         /* not enough data there */
    1001 GIC           8 :         return -1;
    1002 EUB             :     }
    1003                 :     else
    1004                 :     {
    1005 ECB             :         /* enough bytes available to satisfy the request */
    1006 GIC         688 :         count = read_upto - targetPagePtr;
    1007                 :     }
    1008                 : 
    1009                 :     /*
    1010                 :      * Even though we just determined how much of the page can be validly read
    1011                 :      * as 'count', read the whole page anyway. It's guaranteed to be
    1012                 :      * zero-padded up to the page boundary if it's incomplete.
    1013 EUB             :      */
    1014 GIC       23258 :     if (!WALRead(state, cur_page, targetPagePtr, XLOG_BLCKSZ, tli,
    1015 EUB             :                  &errinfo))
    1016 UIC           0 :         WALReadRaiseError(&errinfo);
    1017                 : 
    1018 EUB             :     /* number of valid bytes in the buffer */
    1019 GIC       23258 :     return count;
    1020 EUB             : }
    1021                 : 
    1022                 : /*
    1023                 :  * Backend-specific convenience code to handle read errors encountered by
    1024                 :  * WALRead().
    1025                 :  */
    1026                 : void
    1027 UIC           0 : WALReadRaiseError(WALReadError *errinfo)
    1028 EUB             : {
    1029 UIC           0 :     WALOpenSegment *seg = &errinfo->wre_seg;
    1030 EUB             :     char        fname[MAXFNAMELEN];
    1031                 : 
    1032 UIC           0 :     XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
    1033                 : 
    1034               0 :     if (errinfo->wre_read < 0)
    1035                 :     {
    1036 UBC           0 :         errno = errinfo->wre_errno;
    1037 UIC           0 :         ereport(ERROR,
    1038                 :                 (errcode_for_file_access(),
    1039                 :                  errmsg("could not read from WAL segment %s, offset %d: %m",
    1040                 :                         fname, errinfo->wre_off)));
    1041                 :     }
    1042               0 :     else if (errinfo->wre_read == 0)
    1043                 :     {
    1044               0 :         ereport(ERROR,
    1045                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1046                 :                  errmsg("could not read from WAL segment %s, offset %d: read %d of %d",
    1047                 :                         fname, errinfo->wre_off, errinfo->wre_read,
    1048                 :                         errinfo->wre_req)));
    1049                 :     }
    1050               0 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a