LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - xlogutils.c (source / functions) Coverage Total Hit UNC UBC GNC CBC DCB
Current: Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128 Lines: 75.5 % 241 182 59 5 177 7
Current Date: 2024-04-14 14:21:10 Functions: 90.9 % 22 20 2 5 15
Baseline: 16@8cea358b128 Branches: 51.0 % 204 104 1 99 1 103
Baseline Date: 2024-04-14 14:21:09 Line coverage date bins:
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed [..60] days: 100.0 % 4 4 4
(60,120] days: 100.0 % 1 1 1
(180,240] days: 100.0 % 1 1 1
(240..) days: 74.9 % 235 176 59 176
Function coverage date bins:
(240..) days: 90.9 % 22 20 2 5 15
Branch coverage date bins:
[..60] days: 50.0 % 2 1 1 1
(240..) days: 51.0 % 202 103 99 103

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * xlogutils.c
                                  4                 :                :  *
                                  5                 :                :  * PostgreSQL write-ahead log manager utility routines
                                  6                 :                :  *
                                  7                 :                :  * This file contains support routines that are used by XLOG replay functions.
                                  8                 :                :  * None of this code is used during normal system operation.
                                  9                 :                :  *
                                 10                 :                :  *
                                 11                 :                :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
                                 12                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                 13                 :                :  *
                                 14                 :                :  * src/backend/access/transam/xlogutils.c
                                 15                 :                :  *
                                 16                 :                :  *-------------------------------------------------------------------------
                                 17                 :                :  */
                                 18                 :                : #include "postgres.h"
                                 19                 :                : 
                                 20                 :                : #include <unistd.h>
                                 21                 :                : 
                                 22                 :                : #include "access/timeline.h"
                                 23                 :                : #include "access/xlogrecovery.h"
                                 24                 :                : #include "access/xlog_internal.h"
                                 25                 :                : #include "access/xlogutils.h"
                                 26                 :                : #include "miscadmin.h"
                                 27                 :                : #include "storage/fd.h"
                                 28                 :                : #include "storage/smgr.h"
                                 29                 :                : #include "utils/hsearch.h"
                                 30                 :                : #include "utils/rel.h"
                                 31                 :                : 
                                 32                 :                : 
                                 33                 :                : /* GUC variable */
                                 34                 :                : bool        ignore_invalid_pages = false;
                                 35                 :                : 
                                 36                 :                : /*
                                 37                 :                :  * Are we doing recovery from XLOG?
                                 38                 :                :  *
                                 39                 :                :  * This is only ever true in the startup process; it should be read as meaning
                                 40                 :                :  * "this process is replaying WAL records", rather than "the system is in
                                 41                 :                :  * recovery mode".  It should be examined primarily by functions that need
                                 42                 :                :  * to act differently when called from a WAL redo function (e.g., to skip WAL
                                 43                 :                :  * logging).  To check whether the system is in recovery regardless of which
                                 44                 :                :  * process you're running in, use RecoveryInProgress() but only after shared
                                 45                 :                :  * memory startup and lock initialization.
                                 46                 :                :  *
                                 47                 :                :  * This is updated from xlog.c and xlogrecovery.c, but lives here because
                                 48                 :                :  * it's mostly read by WAL redo functions.
                                 49                 :                :  */
                                 50                 :                : bool        InRecovery = false;
                                 51                 :                : 
                                 52                 :                : /* Are we in Hot Standby mode? Only valid in startup process, see xlogutils.h */
                                 53                 :                : HotStandbyState standbyState = STANDBY_DISABLED;
                                 54                 :                : 
                                 55                 :                : /*
                                 56                 :                :  * During XLOG replay, we may see XLOG records for incremental updates of
                                 57                 :                :  * pages that no longer exist, because their relation was later dropped or
                                 58                 :                :  * truncated.  (Note: this is only possible when full_page_writes = OFF,
                                 59                 :                :  * since when it's ON, the first reference we see to a page should always
                                 60                 :                :  * be a full-page rewrite not an incremental update.)  Rather than simply
                                 61                 :                :  * ignoring such records, we make a note of the referenced page, and then
                                 62                 :                :  * complain if we don't actually see a drop or truncate covering the page
                                 63                 :                :  * later in replay.
                                 64                 :                :  */
                                 65                 :                : typedef struct xl_invalid_page_key
                                 66                 :                : {
                                 67                 :                :     RelFileLocator locator;     /* the relation */
                                 68                 :                :     ForkNumber  forkno;         /* the fork number */
                                 69                 :                :     BlockNumber blkno;          /* the page */
                                 70                 :                : } xl_invalid_page_key;
                                 71                 :                : 
                                 72                 :                : typedef struct xl_invalid_page
                                 73                 :                : {
                                 74                 :                :     xl_invalid_page_key key;    /* hash key ... must be first */
                                 75                 :                :     bool        present;        /* page existed but contained zeroes */
                                 76                 :                : } xl_invalid_page;
                                 77                 :                : 
                                 78                 :                : static HTAB *invalid_page_tab = NULL;
                                 79                 :                : 
                                 80                 :                : static int  read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                 81                 :                :                                       int reqLen, XLogRecPtr targetRecPtr,
                                 82                 :                :                                       char *cur_page, bool wait_for_wal);
                                 83                 :                : 
                                 84                 :                : /* Report a reference to an invalid page */
                                 85                 :                : static void
  648 rhaas@postgresql.org       86                 :UBC           0 : report_invalid_page(int elevel, RelFileLocator locator, ForkNumber forkno,
                                 87                 :                :                     BlockNumber blkno, bool present)
                                 88                 :                : {
                                 89                 :              0 :     char       *path = relpathperm(locator, forkno);
                                 90                 :                : 
 4517 heikki.linnakangas@i       91         [ #  # ]:              0 :     if (present)
                                 92         [ #  # ]:              0 :         elog(elevel, "page %u of relation %s is uninitialized",
                                 93                 :                :              blkno, path);
                                 94                 :                :     else
                                 95         [ #  # ]:              0 :         elog(elevel, "page %u of relation %s does not exist",
                                 96                 :                :              blkno, path);
                                 97                 :              0 :     pfree(path);
                                 98                 :              0 : }
                                 99                 :                : 
                                100                 :                : /* Log a reference to an invalid page */
                                101                 :                : static void
  648 rhaas@postgresql.org      102                 :CBC          45 : log_invalid_page(RelFileLocator locator, ForkNumber forkno, BlockNumber blkno,
                                103                 :                :                  bool present)
                                104                 :                : {
                                105                 :                :     xl_invalid_page_key key;
                                106                 :                :     xl_invalid_page *hentry;
                                107                 :                :     bool        found;
                                108                 :                : 
                                109                 :                :     /*
                                110                 :                :      * Once recovery has reached a consistent state, the invalid-page table
                                111                 :                :      * should be empty and remain so. If a reference to an invalid page is
                                112                 :                :      * found after consistency is reached, PANIC immediately. This might seem
                                113                 :                :      * aggressive, but it's better than letting the invalid reference linger
                                114                 :                :      * in the hash table until the end of recovery and PANIC there, which
                                115                 :                :      * might come only much later if this is a standby server.
                                116                 :                :      */
 4510 heikki.linnakangas@i      117         [ -  + ]:             45 :     if (reachedConsistency)
                                118                 :                :     {
  648 rhaas@postgresql.org      119                 :UBC           0 :         report_invalid_page(WARNING, locator, forkno, blkno, present);
 1544 fujii@postgresql.org      120   [ #  #  #  # ]:              0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
                                121                 :                :              "WAL contains references to invalid pages");
                                122                 :                :     }
                                123                 :                : 
                                124                 :                :     /*
                                125                 :                :      * Log references to invalid pages at DEBUG1 level.  This allows some
                                126                 :                :      * tracing of the cause (note the elog context mechanism will tell us
                                127                 :                :      * something about the XLOG record that generated the reference).
                                128                 :                :      */
 1238 tgl@sss.pgh.pa.us         129         [ -  + ]:CBC          45 :     if (message_level_is_interesting(DEBUG1))
  648 rhaas@postgresql.org      130                 :UBC           0 :         report_invalid_page(DEBUG1, locator, forkno, blkno, present);
                                131                 :                : 
 6575 tgl@sss.pgh.pa.us         132         [ +  + ]:CBC          45 :     if (invalid_page_tab == NULL)
                                133                 :                :     {
                                134                 :                :         /* create hash table when first needed */
                                135                 :                :         HASHCTL     ctl;
                                136                 :                : 
                                137                 :              1 :         ctl.keysize = sizeof(xl_invalid_page_key);
                                138                 :              1 :         ctl.entrysize = sizeof(xl_invalid_page);
                                139                 :                : 
                                140                 :              1 :         invalid_page_tab = hash_create("XLOG invalid-page table",
                                141                 :                :                                        100,
                                142                 :                :                                        &ctl,
                                143                 :                :                                        HASH_ELEM | HASH_BLOBS);
                                144                 :                :     }
                                145                 :                : 
                                146                 :                :     /* we currently assume xl_invalid_page_key contains no padding */
  648 rhaas@postgresql.org      147                 :             45 :     key.locator = locator;
 5725 heikki.linnakangas@i      148                 :             45 :     key.forkno = forkno;
 6575 tgl@sss.pgh.pa.us         149                 :             45 :     key.blkno = blkno;
                                150                 :                :     hentry = (xl_invalid_page *)
  433 peter@eisentraut.org      151                 :             45 :         hash_search(invalid_page_tab, &key, HASH_ENTER, &found);
                                152                 :                : 
 6575 tgl@sss.pgh.pa.us         153         [ +  - ]:             45 :     if (!found)
                                154                 :                :     {
                                155                 :                :         /* hash_search already filled in the key */
                                156                 :             45 :         hentry->present = present;
                                157                 :                :     }
                                158                 :                :     else
                                159                 :                :     {
                                160                 :                :         /* repeat reference ... leave "present" as it was */
                                161                 :                :     }
                                162                 :             45 : }
                                163                 :                : 
                                164                 :                : /* Forget any invalid pages >= minblkno, because they've been dropped */
                                165                 :                : static void
  648 rhaas@postgresql.org      166                 :          28081 : forget_invalid_pages(RelFileLocator locator, ForkNumber forkno,
                                167                 :                :                      BlockNumber minblkno)
                                168                 :                : {
                                169                 :                :     HASH_SEQ_STATUS status;
                                170                 :                :     xl_invalid_page *hentry;
                                171                 :                : 
 6575 tgl@sss.pgh.pa.us         172         [ +  + ]:          28081 :     if (invalid_page_tab == NULL)
                                173                 :          28065 :         return;                 /* nothing to do */
                                174                 :                : 
                                175                 :             16 :     hash_seq_init(&status, invalid_page_tab);
                                176                 :                : 
                                177         [ +  + ]:            601 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                178                 :                :     {
  648 rhaas@postgresql.org      179   [ +  +  +  -  :            585 :         if (RelFileLocatorEquals(hentry->key.locator, locator) &&
                                              +  - ]
 5725 heikki.linnakangas@i      180         [ +  - ]:             45 :             hentry->key.forkno == forkno &&
 6575 tgl@sss.pgh.pa.us         181         [ +  - ]:             45 :             hentry->key.blkno >= minblkno)
                                182                 :                :         {
 1238                           183         [ -  + ]:             45 :             if (message_level_is_interesting(DEBUG2))
                                184                 :                :             {
  648 rhaas@postgresql.org      185                 :UBC           0 :                 char       *path = relpathperm(hentry->key.locator, forkno);
                                186                 :                : 
 5633 heikki.linnakangas@i      187         [ #  # ]:              0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
                                188                 :                :                      hentry->key.blkno, path);
                                189                 :              0 :                 pfree(path);
                                190                 :                :             }
                                191                 :                : 
 6575 tgl@sss.pgh.pa.us         192         [ -  + ]:CBC          45 :             if (hash_search(invalid_page_tab,
  433 peter@eisentraut.org      193                 :             45 :                             &hentry->key,
                                194                 :                :                             HASH_REMOVE, NULL) == NULL)
 6575 tgl@sss.pgh.pa.us         195         [ #  # ]:UBC           0 :                 elog(ERROR, "hash table corrupted");
                                196                 :                :         }
                                197                 :                :     }
                                198                 :                : }
                                199                 :                : 
                                200                 :                : /* Forget any invalid pages in a whole database */
                                201                 :                : static void
 6575 tgl@sss.pgh.pa.us         202                 :CBC          29 : forget_invalid_pages_db(Oid dbid)
                                203                 :                : {
                                204                 :                :     HASH_SEQ_STATUS status;
                                205                 :                :     xl_invalid_page *hentry;
                                206                 :                : 
                                207         [ +  - ]:             29 :     if (invalid_page_tab == NULL)
                                208                 :             29 :         return;                 /* nothing to do */
                                209                 :                : 
 6575 tgl@sss.pgh.pa.us         210                 :UBC           0 :     hash_seq_init(&status, invalid_page_tab);
                                211                 :                : 
                                212         [ #  # ]:              0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                213                 :                :     {
  648 rhaas@postgresql.org      214         [ #  # ]:              0 :         if (hentry->key.locator.dbOid == dbid)
                                215                 :                :         {
 1238 tgl@sss.pgh.pa.us         216         [ #  # ]:              0 :             if (message_level_is_interesting(DEBUG2))
                                217                 :                :             {
  648 rhaas@postgresql.org      218                 :              0 :                 char       *path = relpathperm(hentry->key.locator, hentry->key.forkno);
                                219                 :                : 
 5633 heikki.linnakangas@i      220         [ #  # ]:              0 :                 elog(DEBUG2, "page %u of relation %s has been dropped",
                                221                 :                :                      hentry->key.blkno, path);
                                222                 :              0 :                 pfree(path);
                                223                 :                :             }
                                224                 :                : 
 6575 tgl@sss.pgh.pa.us         225         [ #  # ]:              0 :             if (hash_search(invalid_page_tab,
  433 peter@eisentraut.org      226                 :              0 :                             &hentry->key,
                                227                 :                :                             HASH_REMOVE, NULL) == NULL)
 6575 tgl@sss.pgh.pa.us         228         [ #  # ]:              0 :                 elog(ERROR, "hash table corrupted");
                                229                 :                :         }
                                230                 :                :     }
                                231                 :                : }
                                232                 :                : 
                                233                 :                : /* Are there any unresolved references to invalid pages? */
                                234                 :                : bool
 4517 heikki.linnakangas@i      235                 :CBC         326 : XLogHaveInvalidPages(void)
                                236                 :                : {
                                237   [ -  +  -  - ]:            326 :     if (invalid_page_tab != NULL &&
 4517 heikki.linnakangas@i      238                 :UBC           0 :         hash_get_num_entries(invalid_page_tab) > 0)
                                239                 :              0 :         return true;
 4517 heikki.linnakangas@i      240                 :CBC         326 :     return false;
                                241                 :                : }
                                242                 :                : 
                                243                 :                : /* Complain about any remaining invalid-page entries */
                                244                 :                : void
 6575 tgl@sss.pgh.pa.us         245                 :            145 : XLogCheckInvalidPages(void)
                                246                 :                : {
                                247                 :                :     HASH_SEQ_STATUS status;
                                248                 :                :     xl_invalid_page *hentry;
                                249                 :            145 :     bool        foundone = false;
                                250                 :                : 
                                251         [ +  - ]:            145 :     if (invalid_page_tab == NULL)
                                252                 :            145 :         return;                 /* nothing to do */
                                253                 :                : 
 6575 tgl@sss.pgh.pa.us         254                 :UBC           0 :     hash_seq_init(&status, invalid_page_tab);
                                255                 :                : 
                                256                 :                :     /*
                                257                 :                :      * Our strategy is to emit WARNING messages for all remaining entries and
                                258                 :                :      * only PANIC after we've dumped all the available info.
                                259                 :                :      */
                                260         [ #  # ]:              0 :     while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
                                261                 :                :     {
  648 rhaas@postgresql.org      262                 :              0 :         report_invalid_page(WARNING, hentry->key.locator, hentry->key.forkno,
 4517 heikki.linnakangas@i      263                 :              0 :                             hentry->key.blkno, hentry->present);
 6575 tgl@sss.pgh.pa.us         264                 :              0 :         foundone = true;
                                265                 :                :     }
                                266                 :                : 
                                267         [ #  # ]:              0 :     if (foundone)
 1544 fujii@postgresql.org      268   [ #  #  #  # ]:              0 :         elog(ignore_invalid_pages ? WARNING : PANIC,
                                269                 :                :              "WAL contains references to invalid pages");
                                270                 :                : 
 5785 heikki.linnakangas@i      271                 :              0 :     hash_destroy(invalid_page_tab);
                                272                 :              0 :     invalid_page_tab = NULL;
                                273                 :                : }
                                274                 :                : 
                                275                 :                : 
                                276                 :                : /*
                                277                 :                :  * XLogReadBufferForRedo
                                278                 :                :  *      Read a page during XLOG replay
                                279                 :                :  *
                                280                 :                :  * Reads a block referenced by a WAL record into shared buffer cache, and
                                281                 :                :  * determines what needs to be done to redo the changes to it.  If the WAL
                                282                 :                :  * record includes a full-page image of the page, it is restored.
                                283                 :                :  *
                                284                 :                :  * 'record.EndRecPtr' is compared to the page's LSN to determine if the record
                                285                 :                :  * has already been replayed.  'block_id' is the ID number the block was
                                286                 :                :  * registered with, when the WAL record was created.
                                287                 :                :  *
                                288                 :                :  * Returns one of the following:
                                289                 :                :  *
                                290                 :                :  *  BLK_NEEDS_REDO  - changes from the WAL record need to be applied
                                291                 :                :  *  BLK_DONE        - block doesn't need replaying
                                292                 :                :  *  BLK_RESTORED    - block was restored from a full-page image included in
                                293                 :                :  *                    the record
                                294                 :                :  *  BLK_NOTFOUND    - block was not found (because it was truncated away by
                                295                 :                :  *                    an operation later in the WAL stream)
                                296                 :                :  *
                                297                 :                :  * On return, the buffer is locked in exclusive-mode, and returned in *buf.
                                298                 :                :  * Note that the buffer is locked and returned even if it doesn't need
                                299                 :                :  * replaying.  (Getting the buffer lock is not really necessary during
                                300                 :                :  * single-process crash recovery, but some subroutines such as MarkBufferDirty
                                301                 :                :  * will complain if we don't have the lock.  In hot standby mode it's
                                302                 :                :  * definitely necessary.)
                                303                 :                :  *
                                304                 :                :  * Note: when a backup block is available in XLOG with the BKPIMAGE_APPLY flag
                                305                 :                :  * set, we restore it, even if the page in the database appears newer.  This
                                306                 :                :  * is to protect ourselves against database pages that were partially or
                                307                 :                :  * incorrectly written during a crash.  We assume that the XLOG data must be
                                308                 :                :  * good because it has passed a CRC check, while the database page might not
                                309                 :                :  * be.  This will force us to replay all subsequent modifications of the page
                                310                 :                :  * that appear in XLOG, rather than possibly ignoring them as already
                                311                 :                :  * applied, but that's not a huge drawback.
                                312                 :                :  */
                                313                 :                : XLogRedoAction
 3433 heikki.linnakangas@i      314                 :CBC     3062232 : XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
                                315                 :                :                       Buffer *buf)
                                316                 :                : {
                                317                 :        3062232 :     return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
                                318                 :                :                                          false, buf);
                                319                 :                : }
                                320                 :                : 
                                321                 :                : /*
                                322                 :                :  * Pin and lock a buffer referenced by a WAL record, for the purpose of
                                323                 :                :  * re-initializing it.
                                324                 :                :  */
                                325                 :                : Buffer
                                326                 :          52723 : XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
                                327                 :                : {
                                328                 :                :     Buffer      buf;
                                329                 :                : 
                                330                 :          52723 :     XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
                                331                 :                :                                   &buf);
                                332                 :          52723 :     return buf;
                                333                 :                : }
                                334                 :                : 
                                335                 :                : /*
                                336                 :                :  * XLogReadBufferForRedoExtended
                                337                 :                :  *      Like XLogReadBufferForRedo, but with extra options.
                                338                 :                :  *
                                339                 :                :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                340                 :                :  * with all-zeroes pages up to the referenced block number.  In
                                341                 :                :  * RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
                                342                 :                :  * is always BLK_NEEDS_REDO.
                                343                 :                :  *
                                344                 :                :  * (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
                                345                 :                :  * parameter. Do not use an inconsistent combination!)
                                346                 :                :  *
                                347                 :                :  * If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
                                348                 :                :  * using LockBufferForCleanup(), instead of a regular exclusive lock.
                                349                 :                :  */
                                350                 :                : XLogRedoAction
                                351                 :        3137812 : XLogReadBufferForRedoExtended(XLogReaderState *record,
                                352                 :                :                               uint8 block_id,
                                353                 :                :                               ReadBufferMode mode, bool get_cleanup_lock,
                                354                 :                :                               Buffer *buf)
                                355                 :                : {
                                356                 :        3137812 :     XLogRecPtr  lsn = record->EndRecPtr;
                                357                 :                :     RelFileLocator rlocator;
                                358                 :                :     ForkNumber  forknum;
                                359                 :                :     BlockNumber blkno;
                                360                 :                :     Buffer      prefetch_buffer;
                                361                 :                :     Page        page;
                                362                 :                :     bool        zeromode;
                                363                 :                :     bool        willinit;
                                364                 :                : 
  648 rhaas@postgresql.org      365         [ -  + ]:        3137812 :     if (!XLogRecGetBlockTagExtended(record, block_id, &rlocator, &forknum, &blkno,
                                366                 :                :                                     &prefetch_buffer))
                                367                 :                :     {
                                368                 :                :         /* Caller specified a bogus block_id */
  734 tgl@sss.pgh.pa.us         369         [ #  # ]:UBC           0 :         elog(PANIC, "failed to locate backup block with ID %d in WAL record",
                                370                 :                :              block_id);
                                371                 :                :     }
                                372                 :                : 
                                373                 :                :     /*
                                374                 :                :      * Make sure that if the block is marked with WILL_INIT, the caller is
                                375                 :                :      * going to initialize it. And vice versa.
                                376                 :                :      */
 3191 heikki.linnakangas@i      377   [ +  +  +  + ]:CBC     3137812 :     zeromode = (mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK);
  758 tmunro@postgresql.or      378                 :        3137812 :     willinit = (XLogRecGetBlock(record, block_id)->flags & BKPBLOCK_WILL_INIT) != 0;
 3191 heikki.linnakangas@i      379   [ +  +  -  + ]:        3137812 :     if (willinit && !zeromode)
 3191 heikki.linnakangas@i      380         [ #  # ]:UBC           0 :         elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
 3191 heikki.linnakangas@i      381   [ +  +  -  + ]:CBC     3137812 :     if (!willinit && zeromode)
 3191 heikki.linnakangas@i      382         [ #  # ]:UBC           0 :         elog(PANIC, "block to be initialized in redo routine must be marked with WILL_INIT flag in the WAL record");
                                383                 :                : 
                                384                 :                :     /* If it has a full-page image and it should be restored, do it. */
 2622 rhaas@postgresql.org      385         [ +  + ]:CBC     3137812 :     if (XLogRecBlockImageApply(record, block_id))
                                386                 :                :     {
                                387         [ -  + ]:          56209 :         Assert(XLogRecHasBlockImage(record, block_id));
  648                           388         [ +  + ]:          56209 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno,
                                389                 :                :                                       get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK,
                                390                 :                :                                       prefetch_buffer);
 2916 kgrittn@postgresql.o      391                 :          56209 :         page = BufferGetPage(*buf);
 3433 heikki.linnakangas@i      392         [ -  + ]:          56209 :         if (!RestoreBlockImage(record, block_id, page))
  583 michael@paquier.xyz       393         [ #  # ]:UBC           0 :             ereport(ERROR,
                                394                 :                :                     (errcode(ERRCODE_INTERNAL_ERROR),
                                395                 :                :                      errmsg_internal("%s", record->errormsg_buf)));
                                396                 :                : 
                                397                 :                :         /*
                                398                 :                :          * The page may be uninitialized. If so, we can't set the LSN because
                                399                 :                :          * that would corrupt the page.
                                400                 :                :          */
 3433 heikki.linnakangas@i      401         [ +  + ]:CBC       56209 :         if (!PageIsNew(page))
                                402                 :                :         {
                                403                 :          56194 :             PageSetLSN(page, lsn);
                                404                 :                :         }
                                405                 :                : 
                                406                 :          56209 :         MarkBufferDirty(*buf);
                                407                 :                : 
                                408                 :                :         /*
                                409                 :                :          * At the end of crash recovery the init forks of unlogged relations
                                410                 :                :          * are copied, without going through shared buffers. So we need to
                                411                 :                :          * force the on-disk state of init forks to always be in sync with the
                                412                 :                :          * state in shared buffers.
                                413                 :                :          */
 3048 andres@anarazel.de        414         [ +  + ]:          56209 :         if (forknum == INIT_FORKNUM)
                                415                 :             26 :             FlushOneBuffer(*buf);
                                416                 :                : 
 3532 heikki.linnakangas@i      417                 :          56209 :         return BLK_RESTORED;
                                418                 :                :     }
                                419                 :                :     else
                                420                 :                :     {
  648 rhaas@postgresql.org      421                 :        3081603 :         *buf = XLogReadBufferExtended(rlocator, forknum, blkno, mode, prefetch_buffer);
 3532 heikki.linnakangas@i      422         [ +  + ]:        3081603 :         if (BufferIsValid(*buf))
                                423                 :                :         {
 3440                           424   [ +  +  +  + ]:        3081558 :             if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
                                425                 :                :             {
                                426         [ +  + ]:        3028611 :                 if (get_cleanup_lock)
                                427                 :           9945 :                     LockBufferForCleanup(*buf);
                                428                 :                :                 else
                                429                 :        3018666 :                     LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
                                430                 :                :             }
 2916 kgrittn@postgresql.o      431         [ -  + ]:        3081558 :             if (lsn <= PageGetLSN(BufferGetPage(*buf)))
 3532 heikki.linnakangas@i      432                 :UBC           0 :                 return BLK_DONE;
                                433                 :                :             else
 3532 heikki.linnakangas@i      434                 :CBC     3081558 :                 return BLK_NEEDS_REDO;
                                435                 :                :         }
                                436                 :                :         else
                                437                 :             45 :             return BLK_NOTFOUND;
                                438                 :                :     }
                                439                 :                : }
                                440                 :                : 
                                441                 :                : /*
                                442                 :                :  * XLogReadBufferExtended
                                443                 :                :  *      Read a page during XLOG replay
                                444                 :                :  *
                                445                 :                :  * This is functionally comparable to ReadBufferExtended. There's some
                                446                 :                :  * differences in the behavior wrt. the "mode" argument:
                                447                 :                :  *
                                448                 :                :  * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we
                                449                 :                :  * return InvalidBuffer. In this case the caller should silently skip the
                                450                 :                :  * update on this page. (In this situation, we expect that the page was later
                                451                 :                :  * dropped or truncated. If we don't see evidence of that later in the WAL
                                452                 :                :  * sequence, we'll complain at the end of WAL replay.)
                                453                 :                :  *
                                454                 :                :  * In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
                                455                 :                :  * with all-zeroes pages up to the given block number.
                                456                 :                :  *
                                457                 :                :  * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
                                458                 :                :  * exist, and we don't check for all-zeroes.  Thus, no log entry is made
                                459                 :                :  * to imply that the page should be dropped or truncated later.
                                460                 :                :  *
                                461                 :                :  * Optionally, recent_buffer can be used to provide a hint about the location
                                462                 :                :  * of the page in the buffer pool; it does not have to be correct, but avoids
                                463                 :                :  * a buffer mapping table probe if it is.
                                464                 :                :  *
                                465                 :                :  * NB: A redo function should normally not call this directly. To get a page
                                466                 :                :  * to modify, use XLogReadBufferForRedoExtended instead. It is important that
                                467                 :                :  * all pages modified by a WAL record are registered in the WAL records, or
                                468                 :                :  * they will be invisible to tools that need to know which pages are modified.
                                469                 :                :  */
                                470                 :                : Buffer
  648 rhaas@postgresql.org      471                 :        3500890 : XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum,
                                472                 :                :                        BlockNumber blkno, ReadBufferMode mode,
                                473                 :                :                        Buffer recent_buffer)
                                474                 :                : {
                                475                 :                :     BlockNumber lastblock;
                                476                 :                :     Buffer      buffer;
                                477                 :                :     SMgrRelation smgr;
                                478                 :                : 
 6591 tgl@sss.pgh.pa.us         479         [ -  + ]:        3500890 :     Assert(blkno != P_NEW);
                                480                 :                : 
                                481                 :                :     /* Do we have a clue where the buffer might be already? */
  738 tmunro@postgresql.or      482   [ +  +  +  - ]:        3500890 :     if (BufferIsValid(recent_buffer) &&
                                483         [ +  + ]:         518831 :         mode == RBM_NORMAL &&
  648 rhaas@postgresql.org      484                 :         518831 :         ReadRecentBuffer(rlocator, forknum, blkno, recent_buffer))
                                485                 :                :     {
  738 tmunro@postgresql.or      486                 :         516485 :         buffer = recent_buffer;
                                487                 :         516485 :         goto recent_buffer_fast_path;
                                488                 :                :     }
                                489                 :                : 
                                490                 :                :     /* Open the relation at smgr level */
   42 heikki.linnakangas@i      491                 :GNC     2984405 :     smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
                                492                 :                : 
                                493                 :                :     /*
                                494                 :                :      * Create the target file if it doesn't already exist.  This lets us cope
                                495                 :                :      * if the replay sequence contains writes to a relation that is later
                                496                 :                :      * deleted.  (The original coding of this routine would instead suppress
                                497                 :                :      * the writes, but that seems like it risks losing valuable data if the
                                498                 :                :      * filesystem loses an inode during a crash.  Better to write the data
                                499                 :                :      * until we are actually told to delete the file.)
                                500                 :                :      */
 5625 heikki.linnakangas@i      501                 :CBC     2984405 :     smgrcreate(smgr, forknum, true);
                                502                 :                : 
 5725                           503                 :        2984405 :     lastblock = smgrnblocks(smgr, forknum);
                                504                 :                : 
 6591 tgl@sss.pgh.pa.us         505         [ +  + ]:        2984405 :     if (blkno < lastblock)
                                506                 :                :     {
                                507                 :                :         /* page exists in file */
  648 rhaas@postgresql.org      508                 :        2936503 :         buffer = ReadBufferWithoutRelcache(rlocator, forknum, blkno,
                                509                 :                :                                            mode, NULL, true);
                                510                 :                :     }
                                511                 :                :     else
                                512                 :                :     {
                                513                 :                :         /* hm, page doesn't exist in file */
 5644 heikki.linnakangas@i      514         [ +  + ]:          47902 :         if (mode == RBM_NORMAL)
                                515                 :                :         {
  648 rhaas@postgresql.org      516                 :             45 :             log_invalid_page(rlocator, forknum, blkno, false);
 6575 tgl@sss.pgh.pa.us         517                 :             45 :             return InvalidBuffer;
                                518                 :                :         }
 3743                           519         [ -  + ]:          47857 :         if (mode == RBM_NORMAL_NO_LOG)
 3743 tgl@sss.pgh.pa.us         520                 :UBC           0 :             return InvalidBuffer;
                                521                 :                :         /* OK to extend the file */
                                522                 :                :         /* we do this in recovery only - no rel-extension lock needed */
 6591 tgl@sss.pgh.pa.us         523         [ -  + ]:CBC       47857 :         Assert(InRecovery);
  235 tmunro@postgresql.or      524                 :          47857 :         buffer = ExtendBufferedRelTo(BMR_SMGR(smgr, RELPERSISTENCE_PERMANENT),
                                525                 :                :                                      forknum,
                                526                 :                :                                      NULL,
                                527                 :                :                                      EB_PERFORMING_RECOVERY |
                                528                 :                :                                      EB_SKIP_EXTENSION_LOCK,
                                529                 :                :                                      blkno + 1,
                                530                 :                :                                      mode);
                                531                 :                :     }
                                532                 :                : 
  738                           533                 :        3500845 : recent_buffer_fast_path:
 5644 heikki.linnakangas@i      534         [ +  + ]:        3500845 :     if (mode == RBM_NORMAL)
                                535                 :                :     {
                                536                 :                :         /* check that page has been initialized */
 2916 kgrittn@postgresql.o      537                 :        3021286 :         Page        page = (Page) BufferGetPage(buffer);
                                538                 :                : 
                                539                 :                :         /*
                                540                 :                :          * We assume that PageIsNew is safe without a lock. During recovery,
                                541                 :                :          * there should be no other backends that could modify the buffer at
                                542                 :                :          * the same time.
                                543                 :                :          */
 5754 tgl@sss.pgh.pa.us         544         [ -  + ]:        3021286 :         if (PageIsNew(page))
                                545                 :                :         {
 5563 heikki.linnakangas@i      546                 :UBC           0 :             ReleaseBuffer(buffer);
  648 rhaas@postgresql.org      547                 :              0 :             log_invalid_page(rlocator, forknum, blkno, true);
 6575 tgl@sss.pgh.pa.us         548                 :              0 :             return InvalidBuffer;
                                549                 :                :         }
                                550                 :                :     }
                                551                 :                : 
 6668 neilc@samurai.com         552                 :CBC     3500845 :     return buffer;
                                553                 :                : }
                                554                 :                : 
                                555                 :                : /*
                                556                 :                :  * Struct actually returned by CreateFakeRelcacheEntry, though the declared
                                557                 :                :  * return type is Relation.
                                558                 :                :  */
                                559                 :                : typedef struct
                                560                 :                : {
                                561                 :                :     RelationData reldata;       /* Note: this must be first */
                                562                 :                :     FormData_pg_class pgc;
                                563                 :                : } FakeRelCacheEntryData;
                                564                 :                : 
                                565                 :                : typedef FakeRelCacheEntryData *FakeRelCacheEntry;
                                566                 :                : 
                                567                 :                : /*
                                568                 :                :  * Create a fake relation cache entry for a physical relation
                                569                 :                :  *
                                570                 :                :  * It's often convenient to use the same functions in XLOG replay as in the
                                571                 :                :  * main codepath, but those functions typically work with a relcache entry.
                                572                 :                :  * We don't have a working relation cache during XLOG replay, but this
                                573                 :                :  * function can be used to create a fake relcache entry instead. Only the
                                574                 :                :  * fields related to physical storage, like rd_rel, are initialized, so the
                                575                 :                :  * fake entry is only usable in low-level operations like ReadBuffer().
                                576                 :                :  *
                                577                 :                :  * This is also used for syncing WAL-skipped files.
                                578                 :                :  *
                                579                 :                :  * Caller must free the returned entry with FreeFakeRelcacheEntry().
                                580                 :                :  */
                                581                 :                : Relation
  648 rhaas@postgresql.org      582                 :          51167 : CreateFakeRelcacheEntry(RelFileLocator rlocator)
                                583                 :                : {
                                584                 :                :     FakeRelCacheEntry fakeentry;
                                585                 :                :     Relation    rel;
                                586                 :                : 
                                587                 :                :     /* Allocate the Relation struct and all related space in one block. */
 5785 heikki.linnakangas@i      588                 :          51167 :     fakeentry = palloc0(sizeof(FakeRelCacheEntryData));
                                589                 :          51167 :     rel = (Relation) fakeentry;
                                590                 :                : 
                                591                 :          51167 :     rel->rd_rel = &fakeentry->pgc;
  648 rhaas@postgresql.org      592                 :          51167 :     rel->rd_locator = rlocator;
                                593                 :                : 
                                594                 :                :     /*
                                595                 :                :      * We will never be working with temp rels during recovery or while
                                596                 :                :      * syncing WAL-skipped files.
                                597                 :                :      */
   42 heikki.linnakangas@i      598                 :GNC       51167 :     rel->rd_backend = INVALID_PROC_NUMBER;
                                599                 :                : 
                                600                 :                :     /* It must be a permanent table here */
 4230 rhaas@postgresql.org      601                 :CBC       51167 :     rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT;
                                602                 :                : 
                                603                 :                :     /* We don't know the name of the relation; use relfilenumber instead */
  564                           604                 :          51167 :     sprintf(RelationGetRelationName(rel), "%u", rlocator.relNumber);
                                605                 :                : 
                                606                 :                :     /*
                                607                 :                :      * We set up the lockRelId in case anything tries to lock the dummy
                                608                 :                :      * relation.  Note that this is fairly bogus since relNumber may be
                                609                 :                :      * different from the relation's OID.  It shouldn't really matter though.
                                610                 :                :      * In recovery, we are running by ourselves and can't have any lock
                                611                 :                :      * conflicts.  While syncing, we already hold AccessExclusiveLock.
                                612                 :                :      */
  648                           613                 :          51167 :     rel->rd_lockInfo.lockRelId.dbId = rlocator.dbOid;
  564                           614                 :          51167 :     rel->rd_lockInfo.lockRelId.relId = rlocator.relNumber;
                                615                 :                : 
                                616                 :                :     /*
                                617                 :                :      * Set up a non-pinned SMgrRelation reference, so that we don't need to
                                618                 :                :      * worry about unpinning it on error.
                                619                 :                :      */
   42 heikki.linnakangas@i      620                 :GNC       51167 :     rel->rd_smgr = smgropen(rlocator, INVALID_PROC_NUMBER);
                                621                 :                : 
 5785 heikki.linnakangas@i      622                 :CBC       51167 :     return rel;
                                623                 :                : }
                                624                 :                : 
                                625                 :                : /*
                                626                 :                :  * Free a fake relation cache entry.
                                627                 :                :  */
                                628                 :                : void
                                629                 :          51167 : FreeFakeRelcacheEntry(Relation fakerel)
                                630                 :                : {
                                631                 :          51167 :     pfree(fakerel);
 8577 vadim4o@yahoo.com         632                 :          51167 : }
                                633                 :                : 
                                634                 :                : /*
                                635                 :                :  * Drop a relation during XLOG replay
                                636                 :                :  *
                                637                 :                :  * This is called when the relation is about to be deleted; we need to remove
                                638                 :                :  * any open "invalid-page" records for the relation.
                                639                 :                :  */
                                640                 :                : void
  648 rhaas@postgresql.org      641                 :          28028 : XLogDropRelation(RelFileLocator rlocator, ForkNumber forknum)
                                642                 :                : {
                                643                 :          28028 :     forget_invalid_pages(rlocator, forknum, 0);
 7368 tgl@sss.pgh.pa.us         644                 :          28028 : }
                                645                 :                : 
                                646                 :                : /*
                                647                 :                :  * Drop a whole database during XLOG replay
                                648                 :                :  *
                                649                 :                :  * As above, but for DROP DATABASE instead of dropping a single rel
                                650                 :                :  */
                                651                 :                : void
 6591                           652                 :             29 : XLogDropDatabase(Oid dbid)
                                653                 :                : {
                                654                 :                :     /*
                                655                 :                :      * This is unnecessarily heavy-handed, as it will close SMgrRelation
                                656                 :                :      * objects for other databases as well. DROP DATABASE occurs seldom enough
                                657                 :                :      * that it's not worth introducing a variant of smgrdestroy for just this
                                658                 :                :      * purpose.
                                659                 :                :      */
   74 heikki.linnakangas@i      660                 :GNC          29 :     smgrdestroyall();
                                661                 :                : 
 6575 tgl@sss.pgh.pa.us         662                 :CBC          29 :     forget_invalid_pages_db(dbid);
                                663                 :             29 : }
                                664                 :                : 
                                665                 :                : /*
                                666                 :                :  * Truncate a relation during XLOG replay
                                667                 :                :  *
                                668                 :                :  * We need to clean up any open "invalid-page" records for the dropped pages.
                                669                 :                :  */
                                670                 :                : void
  648 rhaas@postgresql.org      671                 :             53 : XLogTruncateRelation(RelFileLocator rlocator, ForkNumber forkNum,
                                672                 :                :                      BlockNumber nblocks)
                                673                 :                : {
                                674                 :             53 :     forget_invalid_pages(rlocator, forkNum, nblocks);
 6591 tgl@sss.pgh.pa.us         675                 :             53 : }
                                676                 :                : 
                                677                 :                : /*
                                678                 :                :  * Determine which timeline to read an xlog page from and set the
                                679                 :                :  * XLogReaderState's currTLI to that timeline ID.
                                680                 :                :  *
                                681                 :                :  * We care about timelines in xlogreader when we might be reading xlog
                                682                 :                :  * generated prior to a promotion, either if we're currently a standby in
                                683                 :                :  * recovery or if we're a promoted primary reading xlogs generated by the old
                                684                 :                :  * primary before our promotion.
                                685                 :                :  *
                                686                 :                :  * wantPage must be set to the start address of the page to read and
                                687                 :                :  * wantLength to the amount of the page that will be read, up to
                                688                 :                :  * XLOG_BLCKSZ. If the amount to be read isn't known, pass XLOG_BLCKSZ.
                                689                 :                :  *
                                690                 :                :  * The currTLI argument should be the system-wide current timeline.
                                691                 :                :  * Note that this may be different from state->currTLI, which is the timeline
                                692                 :                :  * from which the caller is currently reading previous xlog records.
                                693                 :                :  *
                                694                 :                :  * We switch to an xlog segment from the new timeline eagerly when on a
                                695                 :                :  * historical timeline, as soon as we reach the start of the xlog segment
                                696                 :                :  * containing the timeline switch.  The server copied the segment to the new
                                697                 :                :  * timeline so all the data up to the switch point is the same, but there's no
                                698                 :                :  * guarantee the old segment will still exist. It may have been deleted or
                                699                 :                :  * renamed with a .partial suffix so we can't necessarily keep reading from
                                700                 :                :  * the old TLI even though tliSwitchPoint says it's OK.
                                701                 :                :  *
                                702                 :                :  * We can't just check the timeline when we read a page on a different segment
                                703                 :                :  * to the last page. We could've received a timeline switch from a cascading
                                704                 :                :  * upstream, so the current segment ends abruptly (possibly getting renamed to
                                705                 :                :  * .partial) and we have to switch to a new one.  Even in the middle of reading
                                706                 :                :  * a page we could have to dump the cached page and switch to a new TLI.
                                707                 :                :  *
                                708                 :                :  * Because of this, callers MAY NOT assume that currTLI is the timeline that
                                709                 :                :  * will be in a page's xlp_tli; the page may begin on an older timeline or we
                                710                 :                :  * might be reading from historical timeline data on a segment that's been
                                711                 :                :  * copied to a new timeline.
                                712                 :                :  *
                                713                 :                :  * The caller must also make sure it doesn't read past the current replay
                                714                 :                :  * position (using GetXLogReplayRecPtr) if executing in recovery, so it
                                715                 :                :  * doesn't fail to notice that the current timeline became historical.
                                716                 :                :  */
                                717                 :                : void
  891 rhaas@postgresql.org      718                 :          51165 : XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
                                719                 :                :                           uint32 wantLength, TimeLineID currTLI)
                                720                 :                : {
 1070 tmunro@postgresql.or      721                 :          51165 :     const XLogRecPtr lastReadPage = (state->seg.ws_segno *
                                722                 :          51165 :                                      state->segcxt.ws_segsize + state->segoff);
                                723                 :                : 
 2580 simon@2ndQuadrant.co      724   [ +  -  -  + ]:          51165 :     Assert(wantPage != InvalidXLogRecPtr && wantPage % XLOG_BLCKSZ == 0);
                                725         [ -  + ]:          51165 :     Assert(wantLength <= XLOG_BLCKSZ);
                                726   [ -  +  -  - ]:          51165 :     Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
  891 rhaas@postgresql.org      727         [ -  + ]:          51165 :     Assert(currTLI != 0);
                                728                 :                : 
                                729                 :                :     /*
                                730                 :                :      * If the desired page is currently read in and valid, we have nothing to
                                731                 :                :      * do.
                                732                 :                :      *
                                733                 :                :      * The caller should've ensured that it didn't previously advance readOff
                                734                 :                :      * past the valid limit of this timeline, so it doesn't matter if the
                                735                 :                :      * current TLI has since become historical.
                                736                 :                :      */
 2580 simon@2ndQuadrant.co      737         [ +  + ]:          51165 :     if (lastReadPage == wantPage &&
 1070 tmunro@postgresql.or      738         [ -  + ]:           2162 :         state->readLen != 0 &&
 2524 bruce@momjian.us          739         [ #  # ]:UBC           0 :         lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
 2580 simon@2ndQuadrant.co      740                 :              0 :         return;
                                741                 :                : 
                                742                 :                :     /*
                                743                 :                :      * If we're reading from the current timeline, it hasn't become historical
                                744                 :                :      * and the page we're reading is after the last page read, we can again
                                745                 :                :      * just carry on. (Seeking backwards requires a check to make sure the
                                746                 :                :      * older page isn't on a prior timeline).
                                747                 :                :      *
                                748                 :                :      * currTLI might've become historical since the caller obtained the value,
                                749                 :                :      * but the caller is required not to read past the flush limit it saw at
                                750                 :                :      * the time it looked up the timeline. There's nothing we can do about it
                                751                 :                :      * if StartupXLOG() renames it to .partial concurrently.
                                752                 :                :      */
  891 rhaas@postgresql.org      753   [ +  +  +  + ]:CBC       51165 :     if (state->currTLI == currTLI && wantPage >= lastReadPage)
                                754                 :                :     {
 2580 simon@2ndQuadrant.co      755         [ -  + ]:          48078 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr);
                                756                 :          48078 :         return;
                                757                 :                :     }
                                758                 :                : 
                                759                 :                :     /*
                                760                 :                :      * If we're just reading pages from a previously validated historical
                                761                 :                :      * timeline and the timeline we're reading from is valid until the end of
                                762                 :                :      * the current segment we can just keep reading.
                                763                 :                :      */
                                764         [ +  + ]:           3087 :     if (state->currTLIValidUntil != InvalidXLogRecPtr &&
  891 rhaas@postgresql.org      765         [ +  - ]:           1697 :         state->currTLI != currTLI &&
 2580 simon@2ndQuadrant.co      766         [ +  - ]:           1697 :         state->currTLI != 0 &&
 1664 alvherre@alvh.no-ip.      767                 :           1697 :         ((wantPage + wantLength) / state->segcxt.ws_segsize) <
                                768         [ +  + ]:           1697 :         (state->currTLIValidUntil / state->segcxt.ws_segsize))
 2580 simon@2ndQuadrant.co      769                 :           1694 :         return;
                                770                 :                : 
                                771                 :                :     /*
                                772                 :                :      * If we reach this point we're either looking up a page for random
                                773                 :                :      * access, the current timeline just became historical, or we're reading
                                774                 :                :      * from a new segment containing a timeline switch. In all cases we need
                                775                 :                :      * to determine the newest timeline on the segment.
                                776                 :                :      *
                                777                 :                :      * If it's the current timeline we can just keep reading from here unless
                                778                 :                :      * we detect a timeline switch that makes the current timeline historical.
                                779                 :                :      * If it's a historical timeline we can read all the segment on the newest
                                780                 :                :      * timeline because it contains all the old timelines' data too. So only
                                781                 :                :      * one switch check is required.
                                782                 :                :      */
                                783                 :                :     {
                                784                 :                :         /*
                                785                 :                :          * We need to re-read the timeline history in case it's been changed
                                786                 :                :          * by a promotion or replay from a cascaded replica.
                                787                 :                :          */
  891 rhaas@postgresql.org      788                 :           1393 :         List       *timelineHistory = readTimeLineHistory(currTLI);
                                789                 :                :         XLogRecPtr  endOfSegment;
                                790                 :                : 
 1664 alvherre@alvh.no-ip.      791                 :           1393 :         endOfSegment = ((wantPage / state->segcxt.ws_segsize) + 1) *
                                792                 :           1393 :             state->segcxt.ws_segsize - 1;
                                793         [ -  + ]:           1393 :         Assert(wantPage / state->segcxt.ws_segsize ==
                                794                 :                :                endOfSegment / state->segcxt.ws_segsize);
                                795                 :                : 
                                796                 :                :         /*
                                797                 :                :          * Find the timeline of the last LSN on the segment containing
                                798                 :                :          * wantPage.
                                799                 :                :          */
 2580 simon@2ndQuadrant.co      800                 :           1393 :         state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
                                801                 :           1393 :         state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
                                802                 :                :                                                   &state->nextTLI);
                                803                 :                : 
                                804   [ +  +  -  + ]:           1393 :         Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
                                805                 :                :                wantPage + wantLength < state->currTLIValidUntil);
                                806                 :                : 
                                807                 :           1393 :         list_free_deep(timelineHistory);
                                808                 :                : 
                                809         [ -  + ]:           1393 :         elog(DEBUG3, "switched to timeline %u valid until %X/%X",
                                810                 :                :              state->currTLI,
                                811                 :                :              LSN_FORMAT_ARGS(state->currTLIValidUntil));
                                812                 :                :     }
                                813                 :                : }
                                814                 :                : 
                                815                 :                : /* XLogReaderRoutine->segment_open callback for local pg_wal files */
                                816                 :                : void
 1437 alvherre@alvh.no-ip.      817                 :            923 : wal_segment_open(XLogReaderState *state, XLogSegNo nextSegNo,
                                818                 :                :                  TimeLineID *tli_p)
                                819                 :                : {
 1602                           820                 :            923 :     TimeLineID  tli = *tli_p;
                                821                 :                :     char        path[MAXPGPATH];
                                822                 :                : 
 1432                           823                 :            923 :     XLogFilePath(path, tli, nextSegNo, state->segcxt.ws_segsize);
                                824                 :            923 :     state->seg.ws_file = BasicOpenFile(path, O_RDONLY | PG_BINARY);
                                825         [ +  - ]:            923 :     if (state->seg.ws_file >= 0)
                                826                 :            923 :         return;
                                827                 :                : 
 1602 alvherre@alvh.no-ip.      828         [ #  # ]:UBC           0 :     if (errno == ENOENT)
                                829         [ #  # ]:              0 :         ereport(ERROR,
                                830                 :                :                 (errcode_for_file_access(),
                                831                 :                :                  errmsg("requested WAL segment %s has already been removed",
                                832                 :                :                         path)));
                                833                 :                :     else
                                834         [ #  # ]:              0 :         ereport(ERROR,
                                835                 :                :                 (errcode_for_file_access(),
                                836                 :                :                  errmsg("could not open file \"%s\": %m",
                                837                 :                :                         path)));
                                838                 :                : }
                                839                 :                : 
                                840                 :                : /* stock XLogReaderRoutine->segment_close callback */
                                841                 :                : void
 1437 alvherre@alvh.no-ip.      842                 :CBC        9827 : wal_segment_close(XLogReaderState *state)
                                843                 :                : {
                                844                 :           9827 :     close(state->seg.ws_file);
                                845                 :                :     /* need to check errno? */
                                846                 :           9827 :     state->seg.ws_file = -1;
                                847                 :           9827 : }
                                848                 :                : 
                                849                 :                : /*
                                850                 :                :  * XLogReaderRoutine->page_read callback for reading local xlog files
                                851                 :                :  *
                                852                 :                :  * Public because it would likely be very helpful for someone writing another
                                853                 :                :  * output method outside walsender, e.g. in a bgworker.
                                854                 :                :  *
                                855                 :                :  * TODO: The walsender has its own version of this, but it relies on the
                                856                 :                :  * walsender's latch being set whenever WAL is flushed. No such infrastructure
                                857                 :                :  * exists for normal backends, so we have to do a check/sleep/repeat style of
                                858                 :                :  * loop for now.
                                859                 :                :  */
                                860                 :                : int
 1070 tmunro@postgresql.or      861                 :          21280 : read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                862                 :                :                      int reqLen, XLogRecPtr targetRecPtr, char *cur_page)
                                863                 :                : {
  737 jdavis@postgresql.or      864                 :          21280 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                865                 :                :                                      targetRecPtr, cur_page, true);
                                866                 :                : }
                                867                 :                : 
                                868                 :                : /*
                                869                 :                :  * Same as read_local_xlog_page except that it doesn't wait for future WAL
                                870                 :                :  * to be available.
                                871                 :                :  */
                                872                 :                : int
                                873                 :           3826 : read_local_xlog_page_no_wait(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                874                 :                :                              int reqLen, XLogRecPtr targetRecPtr,
                                875                 :                :                              char *cur_page)
                                876                 :                : {
                                877                 :           3826 :     return read_local_xlog_page_guts(state, targetPagePtr, reqLen,
                                878                 :                :                                      targetRecPtr, cur_page, false);
                                879                 :                : }
                                880                 :                : 
                                881                 :                : /*
                                882                 :                :  * Implementation of read_local_xlog_page and its no wait version.
                                883                 :                :  */
                                884                 :                : static int
                                885                 :          25106 : read_local_xlog_page_guts(XLogReaderState *state, XLogRecPtr targetPagePtr,
                                886                 :                :                           int reqLen, XLogRecPtr targetRecPtr,
                                887                 :                :                           char *cur_page, bool wait_for_wal)
                                888                 :                : {
                                889                 :                :     XLogRecPtr  read_upto,
                                890                 :                :                 loc;
                                891                 :                :     TimeLineID  tli;
                                892                 :                :     int         count;
                                893                 :                :     WALReadError errinfo;
                                894                 :                :     TimeLineID  currTLI;
                                895                 :                : 
 3007 simon@2ndQuadrant.co      896                 :          25106 :     loc = targetPagePtr + reqLen;
                                897                 :                : 
                                898                 :                :     /* Loop waiting for xlog to be available if necessary */
                                899                 :                :     while (1)
                                900                 :                :     {
                                901                 :                :         /*
                                902                 :                :          * Determine the limit of xlog we can currently read to, and what the
                                903                 :                :          * most recent timeline is.
                                904                 :                :          */
 2902 alvherre@alvh.no-ip.      905         [ +  + ]:          25787 :         if (!RecoveryInProgress())
  891 rhaas@postgresql.org      906                 :          25006 :             read_upto = GetFlushRecPtr(&currTLI);
                                907                 :                :         else
                                908                 :            781 :             read_upto = GetXLogReplayRecPtr(&currTLI);
                                909                 :          25787 :         tli = currTLI;
                                910                 :                : 
                                911                 :                :         /*
                                912                 :                :          * Check which timeline to get the record from.
                                913                 :                :          *
                                914                 :                :          * We have to do it each time through the loop because if we're in
                                915                 :                :          * recovery as a cascading standby, the current timeline might've
                                916                 :                :          * become historical. We can't rely on RecoveryInProgress() because in
                                917                 :                :          * a standby configuration like
                                918                 :                :          *
                                919                 :                :          * A => B => C
                                920                 :                :          *
                                921                 :                :          * if we're a logical decoding session on C, and B gets promoted, our
                                922                 :                :          * timeline will change while we remain in recovery.
                                923                 :                :          *
                                924                 :                :          * We can't just keep reading from the old timeline as the last WAL
                                925                 :                :          * archive in the timeline will get renamed to .partial by
                                926                 :                :          * StartupXLOG().
                                927                 :                :          *
                                928                 :                :          * If that happens after our caller determined the TLI but before we
                                929                 :                :          * actually read the xlog page, we might still try to read from the
                                930                 :                :          * old (now renamed) segment and fail. There's not much we can do
                                931                 :                :          * about this, but it can only happen when we're a leaf of a cascading
                                932                 :                :          * standby whose primary gets promoted while we're decoding, so a
                                933                 :                :          * one-off ERROR isn't too bad.
                                934                 :                :          */
                                935                 :          25787 :         XLogReadDetermineTimeline(state, targetPagePtr, reqLen, tli);
                                936                 :                : 
                                937         [ +  + ]:          25787 :         if (state->currTLI == currTLI)
                                938                 :                :         {
                                939                 :                : 
 2580 simon@2ndQuadrant.co      940         [ +  + ]:          24090 :             if (loc <= read_upto)
                                941                 :          23399 :                 break;
                                942                 :                : 
                                943                 :                :             /* If asked, let's not wait for future WAL. */
  737 jdavis@postgresql.or      944         [ +  + ]:            691 :             if (!wait_for_wal)
                                945                 :                :             {
                                946                 :                :                 ReadLocalXLogPageNoWaitPrivate *private_data;
                                947                 :                : 
                                948                 :                :                 /*
                                949                 :                :                  * Inform the caller of read_local_xlog_page_no_wait that the
                                950                 :                :                  * end of WAL has been reached.
                                951                 :                :                  */
  715                           952                 :             10 :                 private_data = (ReadLocalXLogPageNoWaitPrivate *)
                                953                 :                :                     state->private_data;
                                954                 :             10 :                 private_data->end_of_wal = true;
  737                           955                 :             10 :                 break;
                                956                 :                :             }
                                957                 :                : 
 2580 simon@2ndQuadrant.co      958         [ -  + ]:            681 :             CHECK_FOR_INTERRUPTS();
                                959                 :            681 :             pg_usleep(1000L);
                                960                 :                :         }
                                961                 :                :         else
                                962                 :                :         {
                                963                 :                :             /*
                                964                 :                :              * We're on a historical timeline, so limit reading to the switch
                                965                 :                :              * point where we moved to the next timeline.
                                966                 :                :              *
                                967                 :                :              * We don't need to GetFlushRecPtr or GetXLogReplayRecPtr. We know
                                968                 :                :              * about the new timeline, so we must've received past the end of
                                969                 :                :              * it.
                                970                 :                :              */
                                971                 :           1697 :             read_upto = state->currTLIValidUntil;
                                972                 :                : 
                                973                 :                :             /*
                                974                 :                :              * Setting tli to our wanted record's TLI is slightly wrong; the
                                975                 :                :              * page might begin on an older timeline if it contains a timeline
                                976                 :                :              * switch, since its xlog segment will have been copied from the
                                977                 :                :              * prior timeline. This is pretty harmless though, as nothing
                                978                 :                :              * cares so long as the timeline doesn't go backwards.  We should
                                979                 :                :              * read the page header instead; FIXME someday.
                                980                 :                :              */
 1602 alvherre@alvh.no-ip.      981                 :           1697 :             tli = state->currTLI;
                                982                 :                : 
                                983                 :                :             /* No need to wait on a historical timeline */
 2580 simon@2ndQuadrant.co      984                 :           1697 :             break;
                                985                 :                :         }
                                986                 :                :     }
                                987                 :                : 
 2937 alvherre@alvh.no-ip.      988         [ +  + ]:          25106 :     if (targetPagePtr + XLOG_BLCKSZ <= read_upto)
                                989                 :                :     {
                                990                 :                :         /*
                                991                 :                :          * more than one block available; read only that block, have caller
                                992                 :                :          * come back if they need more.
                                993                 :                :          */
 3007 simon@2ndQuadrant.co      994                 :          24319 :         count = XLOG_BLCKSZ;
                                995                 :                :     }
 2937 alvherre@alvh.no-ip.      996         [ +  + ]:            787 :     else if (targetPagePtr + reqLen > read_upto)
                                997                 :                :     {
                                998                 :                :         /* not enough data there */
 1070 tmunro@postgresql.or      999                 :             10 :         return -1;
                               1000                 :                :     }
                               1001                 :                :     else
                               1002                 :                :     {
                               1003                 :                :         /* enough bytes available to satisfy the request */
 2937 alvherre@alvh.no-ip.     1004                 :            777 :         count = read_upto - targetPagePtr;
                               1005                 :                :     }
                               1006                 :                : 
   58 jdavis@postgresql.or     1007         [ -  + ]:GNC       25096 :     if (!WALRead(state, cur_page, targetPagePtr, count, tli,
                               1008                 :                :                  &errinfo))
 1602 alvherre@alvh.no-ip.     1009                 :UBC           0 :         WALReadRaiseError(&errinfo);
                               1010                 :                : 
                               1011                 :                :     /* number of valid bytes in the buffer */
 1070 tmunro@postgresql.or     1012                 :CBC       25096 :     return count;
                               1013                 :                : }
                               1014                 :                : 
                               1015                 :                : /*
                               1016                 :                :  * Backend-specific convenience code to handle read errors encountered by
                               1017                 :                :  * WALRead().
                               1018                 :                :  */
                               1019                 :                : void
 1602 alvherre@alvh.no-ip.     1020                 :UBC           0 : WALReadRaiseError(WALReadError *errinfo)
                               1021                 :                : {
                               1022                 :              0 :     WALOpenSegment *seg = &errinfo->wre_seg;
                               1023                 :                :     char        fname[MAXFNAMELEN];
                               1024                 :                : 
 1594 michael@paquier.xyz      1025                 :              0 :     XLogFileName(fname, seg->ws_tli, seg->ws_segno, wal_segment_size);
                               1026                 :                : 
 1602 alvherre@alvh.no-ip.     1027         [ #  # ]:              0 :     if (errinfo->wre_read < 0)
                               1028                 :                :     {
                               1029                 :              0 :         errno = errinfo->wre_errno;
                               1030         [ #  # ]:              0 :         ereport(ERROR,
                               1031                 :                :                 (errcode_for_file_access(),
                               1032                 :                :                  errmsg("could not read from WAL segment %s, offset %d: %m",
                               1033                 :                :                         fname, errinfo->wre_off)));
                               1034                 :                :     }
                               1035         [ #  # ]:              0 :     else if (errinfo->wre_read == 0)
                               1036                 :                :     {
                               1037         [ #  # ]:              0 :         ereport(ERROR,
                               1038                 :                :                 (errcode(ERRCODE_DATA_CORRUPTED),
                               1039                 :                :                  errmsg("could not read from WAL segment %s, offset %d: read %d of %d",
                               1040                 :                :                         fname, errinfo->wre_off, errinfo->wre_read,
                               1041                 :                :                         errinfo->wre_req)));
                               1042                 :                :     }
                               1043                 :              0 : }
        

Generated by: LCOV version 2.1-beta2-3-g6141622