LCOV - differential code coverage report
Current view: top level - src/backend/access/transam - xlogreader.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 75.7 % 773 585 5 31 90 62 19 244 12 310 103 239 4 5
Current Date: 2023-04-08 15:15:32 Functions: 96.6 % 29 28 1 16 1 11 1 15 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * xlogreader.c
       4                 :  *      Generic XLog reading facility
       5                 :  *
       6                 :  * Portions Copyright (c) 2013-2023, PostgreSQL Global Development Group
       7                 :  *
       8                 :  * IDENTIFICATION
       9                 :  *      src/backend/access/transam/xlogreader.c
      10                 :  *
      11                 :  * NOTES
      12                 :  *      See xlogreader.h for more notes on this facility.
      13                 :  *
      14                 :  *      This file is compiled as both front-end and backend code, so it
      15                 :  *      may not use ereport, server-defined static variables, etc.
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : #include "postgres.h"
      19                 : 
      20                 : #include <unistd.h>
      21                 : #ifdef USE_LZ4
      22                 : #include <lz4.h>
      23                 : #endif
      24                 : #ifdef USE_ZSTD
      25                 : #include <zstd.h>
      26                 : #endif
      27                 : 
      28                 : #include "access/transam.h"
      29                 : #include "access/xlog_internal.h"
      30                 : #include "access/xlogreader.h"
      31                 : #include "access/xlogrecord.h"
      32                 : #include "catalog/pg_control.h"
      33                 : #include "common/pg_lzcompress.h"
      34                 : #include "replication/origin.h"
      35                 : 
      36                 : #ifndef FRONTEND
      37                 : #include "miscadmin.h"
      38                 : #include "pgstat.h"
      39                 : #include "utils/memutils.h"
      40                 : #else
      41                 : #include "common/logging.h"
      42                 : #endif
      43                 : 
      44                 : static void report_invalid_record(XLogReaderState *state, const char *fmt,...)
      45                 :             pg_attribute_printf(2, 3);
      46                 : static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
      47                 : static int  ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr,
      48                 :                              int reqLen);
      49                 : static void XLogReaderInvalReadState(XLogReaderState *state);
      50                 : static XLogPageReadResult XLogDecodeNextRecord(XLogReaderState *state, bool nonblocking);
      51                 : static bool ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
      52                 :                                   XLogRecPtr PrevRecPtr, XLogRecord *record, bool randAccess);
      53                 : static bool ValidXLogRecord(XLogReaderState *state, XLogRecord *record,
      54                 :                             XLogRecPtr recptr);
      55                 : static void ResetDecoder(XLogReaderState *state);
      56                 : static void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt,
      57                 :                                int segsize, const char *waldir);
      58                 : 
      59                 : /* size of the buffer allocated for error message. */
      60                 : #define MAX_ERRORMSG_LEN 1000
      61                 : 
      62                 : /*
      63                 :  * Default size; large enough that typical users of XLogReader won't often need
      64                 :  * to use the 'oversized' memory allocation code path.
      65                 :  */
      66                 : #define DEFAULT_DECODE_BUFFER_SIZE (64 * 1024)
      67                 : 
      68                 : /*
      69                 :  * Construct a string in state->errormsg_buf explaining what's wrong with
      70                 :  * the current record being read.
      71                 :  */
      72                 : static void
      73 CBC         188 : report_invalid_record(XLogReaderState *state, const char *fmt,...)
      74                 : {
      75                 :     va_list     args;
      76                 : 
      77             188 :     fmt = _(fmt);
      78                 : 
      79             188 :     va_start(args, fmt);
      80             188 :     vsnprintf(state->errormsg_buf, MAX_ERRORMSG_LEN, fmt, args);
      81             188 :     va_end(args);
      82                 : 
      83             188 :     state->errormsg_deferred = true;
      84             188 : }
      85                 : 
      86                 : /*
      87                 :  * Set the size of the decoding buffer.  A pointer to a caller supplied memory
      88                 :  * region may also be passed in, in which case non-oversized records will be
      89                 :  * decoded there.
      90                 :  */
      91                 : void
      92            1176 : XLogReaderSetDecodeBuffer(XLogReaderState *state, void *buffer, size_t size)
      93                 : {
      94            1176 :     Assert(state->decode_buffer == NULL);
      95                 : 
      96            1176 :     state->decode_buffer = buffer;
      97            1176 :     state->decode_buffer_size = size;
      98            1176 :     state->decode_buffer_tail = buffer;
      99            1176 :     state->decode_buffer_head = buffer;
     100            1176 : }
     101                 : 
     102                 : /*
     103                 :  * Allocate and initialize a new XLogReader.
     104                 :  *
     105                 :  * Returns NULL if the xlogreader couldn't be allocated.
     106                 :  */
     107                 : XLogReaderState *
     108            2624 : XLogReaderAllocate(int wal_segment_size, const char *waldir,
     109                 :                    XLogReaderRoutine *routine, void *private_data)
     110                 : {
     111                 :     XLogReaderState *state;
     112                 : 
     113                 :     state = (XLogReaderState *)
     114            2624 :         palloc_extended(sizeof(XLogReaderState),
     115                 :                         MCXT_ALLOC_NO_OOM | MCXT_ALLOC_ZERO);
     116            2624 :     if (!state)
     117 UBC           0 :         return NULL;
     118                 : 
     119                 :     /* initialize caller-provided support functions */
     120 CBC        2624 :     state->routine = *routine;
     121                 : 
     122                 :     /*
     123                 :      * Permanently allocate readBuf.  We do it this way, rather than just
     124                 :      * making a static array, for two reasons: (1) no need to waste the
     125                 :      * storage in most instantiations of the backend; (2) a static char array
     126                 :      * isn't guaranteed to have any particular alignment, whereas
     127                 :      * palloc_extended() will provide MAXALIGN'd storage.
     128                 :      */
     129            2624 :     state->readBuf = (char *) palloc_extended(XLOG_BLCKSZ,
     130                 :                                               MCXT_ALLOC_NO_OOM);
     131            2624 :     if (!state->readBuf)
     132                 :     {
     133 UBC           0 :         pfree(state);
     134               0 :         return NULL;
     135                 :     }
     136                 : 
     137                 :     /* Initialize segment info. */
     138 CBC        2624 :     WALOpenSegmentInit(&state->seg, &state->segcxt, wal_segment_size,
     139                 :                        waldir);
     140                 : 
     141                 :     /* system_identifier initialized to zeroes above */
     142            2624 :     state->private_data = private_data;
     143                 :     /* ReadRecPtr, EndRecPtr and readLen initialized to zeroes above */
     144            2624 :     state->errormsg_buf = palloc_extended(MAX_ERRORMSG_LEN + 1,
     145                 :                                           MCXT_ALLOC_NO_OOM);
     146            2624 :     if (!state->errormsg_buf)
     147                 :     {
     148 UBC           0 :         pfree(state->readBuf);
     149               0 :         pfree(state);
     150               0 :         return NULL;
     151                 :     }
     152 CBC        2624 :     state->errormsg_buf[0] = '\0';
     153                 : 
     154                 :     /*
     155                 :      * Allocate an initial readRecordBuf of minimal size, which can later be
     156                 :      * enlarged if necessary.
     157                 :      */
     158            2624 :     if (!allocate_recordbuf(state, 0))
     159                 :     {
     160 UBC           0 :         pfree(state->errormsg_buf);
     161               0 :         pfree(state->readBuf);
     162               0 :         pfree(state);
     163               0 :         return NULL;
     164                 :     }
     165                 : 
     166 CBC        2624 :     return state;
     167                 : }
     168                 : 
     169                 : void
     170            2257 : XLogReaderFree(XLogReaderState *state)
     171                 : {
     172            2257 :     if (state->seg.ws_file != -1)
     173            1055 :         state->routine.segment_close(state);
     174                 : 
     175            2257 :     if (state->decode_buffer && state->free_decode_buffer)
     176            2197 :         pfree(state->decode_buffer);
     177                 : 
     178            2257 :     pfree(state->errormsg_buf);
     179            2257 :     if (state->readRecordBuf)
     180            2257 :         pfree(state->readRecordBuf);
     181            2257 :     pfree(state->readBuf);
     182            2257 :     pfree(state);
     183            2257 : }
     184                 : 
     185                 : /*
     186                 :  * Allocate readRecordBuf to fit a record of at least the given length.
     187                 :  * Returns true if successful, false if out of memory.
     188                 :  *
     189                 :  * readRecordBufSize is set to the new buffer size.
     190                 :  *
     191                 :  * To avoid useless small increases, round its size to a multiple of
     192                 :  * XLOG_BLCKSZ, and make sure it's at least 5*Max(BLCKSZ, XLOG_BLCKSZ) to start
     193                 :  * with.  (That is enough for all "normal" records, but very large commit or
     194                 :  * abort records might need more space.)
     195                 :  */
     196                 : static bool
     197            2642 : allocate_recordbuf(XLogReaderState *state, uint32 reclength)
     198                 : {
     199            2642 :     uint32      newSize = reclength;
     200                 : 
     201            2642 :     newSize += XLOG_BLCKSZ - (newSize % XLOG_BLCKSZ);
     202            2642 :     newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
     203                 : 
     204                 : #ifndef FRONTEND
     205                 : 
     206                 :     /*
     207                 :      * Note that in much unlucky circumstances, the random data read from a
     208                 :      * recycled segment can cause this routine to be called with a size
     209                 :      * causing a hard failure at allocation.  For a standby, this would cause
     210                 :      * the instance to stop suddenly with a hard failure, preventing it to
     211                 :      * retry fetching WAL from one of its sources which could allow it to move
     212                 :      * on with replay without a manual restart. If the data comes from a past
     213                 :      * recycled segment and is still valid, then the allocation may succeed
     214                 :      * but record checks are going to fail so this would be short-lived.  If
     215                 :      * the allocation fails because of a memory shortage, then this is not a
     216                 :      * hard failure either per the guarantee given by MCXT_ALLOC_NO_OOM.
     217                 :      */
     218            2642 :     if (!AllocSizeIsValid(newSize))
     219 UBC           0 :         return false;
     220                 : 
     221                 : #endif
     222                 : 
     223 CBC        2642 :     if (state->readRecordBuf)
     224              18 :         pfree(state->readRecordBuf);
     225            2642 :     state->readRecordBuf =
     226            2642 :         (char *) palloc_extended(newSize, MCXT_ALLOC_NO_OOM);
     227            2642 :     if (state->readRecordBuf == NULL)
     228                 :     {
     229 UBC           0 :         state->readRecordBufSize = 0;
     230               0 :         return false;
     231                 :     }
     232 CBC        2642 :     state->readRecordBufSize = newSize;
     233            2642 :     return true;
     234                 : }
     235                 : 
     236                 : /*
     237                 :  * Initialize the passed segment structs.
     238                 :  */
     239                 : static void
     240            2624 : WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt,
     241                 :                    int segsize, const char *waldir)
     242                 : {
     243            2624 :     seg->ws_file = -1;
     244            2624 :     seg->ws_segno = 0;
     245            2624 :     seg->ws_tli = 0;
     246                 : 
     247            2624 :     segcxt->ws_segsize = segsize;
     248            2624 :     if (waldir)
     249 UBC           0 :         snprintf(segcxt->ws_dir, MAXPGPATH, "%s", waldir);
     250 CBC        2624 : }
     251                 : 
     252                 : /*
     253                 :  * Begin reading WAL at 'RecPtr'.
     254                 :  *
     255                 :  * 'RecPtr' should point to the beginning of a valid WAL record.  Pointing at
     256                 :  * the beginning of a page is also OK, if there is a new record right after
     257                 :  * the page header, i.e. not a continuation.
     258                 :  *
     259                 :  * This does not make any attempt to read the WAL yet, and hence cannot fail.
     260                 :  * If the starting address is not correct, the first call to XLogReadRecord()
     261                 :  * will error out.
     262                 :  */
     263                 : void
     264            3709 : XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr)
     265                 : {
     266            3709 :     Assert(!XLogRecPtrIsInvalid(RecPtr));
     267                 : 
     268            3709 :     ResetDecoder(state);
     269                 : 
     270                 :     /* Begin at the passed-in record pointer. */
     271            3709 :     state->EndRecPtr = RecPtr;
     272            3709 :     state->NextRecPtr = RecPtr;
     273            3709 :     state->ReadRecPtr = InvalidXLogRecPtr;
     274            3709 :     state->DecodeRecPtr = InvalidXLogRecPtr;
     275            3709 : }
     276                 : 
     277                 : /*
     278                 :  * Release the last record that was returned by XLogNextRecord(), if any, to
     279                 :  * free up space.  Returns the LSN past the end of the record.
     280                 :  */
     281                 : XLogRecPtr
     282        10026716 : XLogReleasePreviousRecord(XLogReaderState *state)
     283                 : {
     284                 :     DecodedXLogRecord *record;
     285                 :     XLogRecPtr      next_lsn;
     286                 : 
     287        10026716 :     if (!state->record)
     288         5017120 :         return InvalidXLogRecPtr;
     289                 : 
     290                 :     /*
     291                 :      * Remove it from the decoded record queue.  It must be the oldest item
     292                 :      * decoded, decode_queue_head.
     293                 :      */
     294         5009596 :     record = state->record;
     295         5009596 :     next_lsn = record->next_lsn;
     296         5009596 :     Assert(record == state->decode_queue_head);
     297         5009596 :     state->record = NULL;
     298         5009596 :     state->decode_queue_head = record->next;
     299                 : 
     300                 :     /* It might also be the newest item decoded, decode_queue_tail. */
     301         5009596 :     if (state->decode_queue_tail == record)
     302         2532526 :         state->decode_queue_tail = NULL;
     303                 : 
     304                 :     /* Release the space. */
     305         5009596 :     if (unlikely(record->oversized))
     306                 :     {
     307                 :         /* It's not in the decode buffer, so free it to release space. */
     308              10 :         pfree(record);
     309                 :     }
     310                 :     else
     311                 :     {
     312                 :         /* It must be the head (oldest) record in the decode buffer. */
     313         5009586 :         Assert(state->decode_buffer_head == (char *) record);
     314                 : 
     315                 :         /*
     316                 :          * We need to update head to point to the next record that is in the
     317                 :          * decode buffer, if any, being careful to skip oversized ones
     318                 :          * (they're not in the decode buffer).
     319                 :          */
     320         5009586 :         record = record->next;
     321         5009586 :         while (unlikely(record && record->oversized))
     322 UBC           0 :             record = record->next;
     323                 : 
     324 CBC     5009586 :         if (record)
     325                 :         {
     326                 :             /* Adjust head to release space up to the next record. */
     327         2477070 :             state->decode_buffer_head = (char *) record;
     328                 :         }
     329                 :         else
     330                 :         {
     331                 :             /*
     332                 :              * Otherwise we might as well just reset head and tail to the
     333                 :              * start of the buffer space, because we're empty.  This means
     334                 :              * we'll keep overwriting the same piece of memory if we're not
     335                 :              * doing any prefetching.
     336                 :              */
     337         2532516 :             state->decode_buffer_head = state->decode_buffer;
     338         2532516 :             state->decode_buffer_tail = state->decode_buffer;
     339                 :         }
     340                 :     }
     341                 : 
     342         5009596 :     return next_lsn;
     343                 : }
     344                 : 
     345                 : /*
     346                 :  * Attempt to read an XLOG record.
     347                 :  *
     348                 :  * XLogBeginRead() or XLogFindNextRecord() and then XLogReadAhead() must be
     349                 :  * called before the first call to XLogNextRecord().  This functions returns
     350                 :  * records and errors that were put into an internal queue by XLogReadAhead().
     351                 :  *
     352                 :  * On success, a record is returned.
     353                 :  *
     354                 :  * The returned record (or *errormsg) points to an internal buffer that's
     355                 :  * valid until the next call to XLogNextRecord.
     356                 :  */
     357                 : DecodedXLogRecord *
     358         5013282 : XLogNextRecord(XLogReaderState *state, char **errormsg)
     359                 : {
     360                 :     /* Release the last record returned by XLogNextRecord(). */
     361         5013282 :     XLogReleasePreviousRecord(state);
     362                 : 
     363         5013282 :     if (state->decode_queue_head == NULL)
     364                 :     {
     365             406 :         *errormsg = NULL;
     366             406 :         if (state->errormsg_deferred)
     367                 :         {
     368             182 :             if (state->errormsg_buf[0] != '\0')
     369             182 :                 *errormsg = state->errormsg_buf;
     370             182 :             state->errormsg_deferred = false;
     371                 :         }
     372                 : 
     373                 :         /*
     374                 :          * state->EndRecPtr is expected to have been set by the last call to
     375                 :          * XLogBeginRead() or XLogNextRecord(), and is the location of the
     376                 :          * error.
     377                 :          */
     378             406 :         Assert(!XLogRecPtrIsInvalid(state->EndRecPtr));
     379                 : 
     380             406 :         return NULL;
     381                 :     }
     382                 : 
     383                 :     /*
     384                 :      * Record this as the most recent record returned, so that we'll release
     385                 :      * it next time.  This also exposes it to the traditional
     386                 :      * XLogRecXXX(xlogreader) macros, which work with the decoder rather than
     387                 :      * the record for historical reasons.
     388                 :      */
     389         5012876 :     state->record = state->decode_queue_head;
     390                 : 
     391                 :     /*
     392                 :      * Update the pointers to the beginning and one-past-the-end of this
     393                 :      * record, again for the benefit of historical code that expected the
     394                 :      * decoder to track this rather than accessing these fields of the record
     395                 :      * itself.
     396                 :      */
     397         5012876 :     state->ReadRecPtr = state->record->lsn;
     398         5012876 :     state->EndRecPtr = state->record->next_lsn;
     399                 : 
     400         5012876 :     *errormsg = NULL;
     401                 : 
     402         5012876 :     return state->record;
     403                 : }
     404                 : 
     405                 : /*
     406                 :  * Attempt to read an XLOG record.
     407                 :  *
     408                 :  * XLogBeginRead() or XLogFindNextRecord() must be called before the first call
     409                 :  * to XLogReadRecord().
     410                 :  *
     411                 :  * If the page_read callback fails to read the requested data, NULL is
     412                 :  * returned.  The callback is expected to have reported the error; errormsg
     413                 :  * is set to NULL.
     414                 :  *
     415                 :  * If the reading fails for some other reason, NULL is also returned, and
     416                 :  * *errormsg is set to a string with details of the failure.
     417                 :  *
     418                 :  * The returned pointer (or *errormsg) points to an internal buffer that's
     419                 :  * valid until the next call to XLogReadRecord.
     420                 :  */
     421                 : XLogRecord *
     422         2506751 : XLogReadRecord(XLogReaderState *state, char **errormsg)
     423                 : {
     424                 :     DecodedXLogRecord *decoded;
     425                 : 
     426                 :     /*
     427                 :      * Release last returned record, if there is one.  We need to do this so
     428                 :      * that we can check for empty decode queue accurately.
     429                 :      */
     430         2506751 :     XLogReleasePreviousRecord(state);
     431                 : 
     432                 :     /*
     433                 :      * Call XLogReadAhead() in blocking mode to make sure there is something
     434                 :      * in the queue, though we don't use the result.
     435                 :      */
     436         2506751 :     if (!XLogReaderHasQueuedRecordOrError(state))
     437         2506751 :         XLogReadAhead(state, false /* nonblocking */ );
     438                 : 
     439                 :     /* Consume the head record or error. */
     440         2506630 :     decoded = XLogNextRecord(state, errormsg);
     441         2506630 :     if (decoded)
     442                 :     {
     443                 :         /*
     444                 :          * This function returns a pointer to the record's header, not the
     445                 :          * actual decoded record.  The caller will access the decoded record
     446                 :          * through the XLogRecGetXXX() macros, which reach the decoded
     447                 :          * recorded as xlogreader->record.
     448                 :          */
     449         2506419 :         Assert(state->record == decoded);
     450         2506419 :         return &decoded->header;
     451                 :     }
     452                 : 
     453             211 :     return NULL;
     454                 : }
     455                 : 
     456                 : /*
     457                 :  * Allocate space for a decoded record.  The only member of the returned
     458                 :  * object that is initialized is the 'oversized' flag, indicating that the
     459                 :  * decoded record wouldn't fit in the decode buffer and must eventually be
     460                 :  * freed explicitly.
     461                 :  *
     462                 :  * The caller is responsible for adjusting decode_buffer_tail with the real
     463                 :  * size after successfully decoding a record into this space.  This way, if
     464                 :  * decoding fails, then there is nothing to undo unless the 'oversized' flag
     465                 :  * was set and pfree() must be called.
     466                 :  *
     467                 :  * Return NULL if there is no space in the decode buffer and allow_oversized
     468                 :  * is false, or if memory allocation fails for an oversized buffer.
     469                 :  */
     470                 : static DecodedXLogRecord *
     471         5013073 : XLogReadRecordAlloc(XLogReaderState *state, size_t xl_tot_len, bool allow_oversized)
     472                 : {
     473         5013073 :     size_t      required_space = DecodeXLogRecordRequiredSpace(xl_tot_len);
     474         5013073 :     DecodedXLogRecord *decoded = NULL;
     475                 : 
     476                 :     /* Allocate a circular decode buffer if we don't have one already. */
     477         5013073 :     if (unlikely(state->decode_buffer == NULL))
     478                 :     {
     479            2368 :         if (state->decode_buffer_size == 0)
     480            1192 :             state->decode_buffer_size = DEFAULT_DECODE_BUFFER_SIZE;
     481            2368 :         state->decode_buffer = palloc(state->decode_buffer_size);
     482            2368 :         state->decode_buffer_head = state->decode_buffer;
     483            2368 :         state->decode_buffer_tail = state->decode_buffer;
     484            2368 :         state->free_decode_buffer = true;
     485                 :     }
     486                 : 
     487                 :     /* Try to allocate space in the circular decode buffer. */
     488         5013073 :     if (state->decode_buffer_tail >= state->decode_buffer_head)
     489                 :     {
     490                 :         /* Empty, or tail is to the right of head. */
     491         4991318 :         if (state->decode_buffer_tail + required_space <=
     492         4991318 :             state->decode_buffer + state->decode_buffer_size)
     493                 :         {
     494                 :             /* There is space between tail and end. */
     495         4990708 :             decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
     496         4990708 :             decoded->oversized = false;
     497         4990708 :             return decoded;
     498                 :         }
     499             610 :         else if (state->decode_buffer + required_space <
     500             610 :                  state->decode_buffer_head)
     501                 :         {
     502                 :             /* There is space between start and head. */
     503             593 :             decoded = (DecodedXLogRecord *) state->decode_buffer;
     504             593 :             decoded->oversized = false;
     505             593 :             return decoded;
     506                 :         }
     507                 :     }
     508                 :     else
     509                 :     {
     510                 :         /* Tail is to the left of head. */
     511           21755 :         if (state->decode_buffer_tail + required_space <
     512           21755 :             state->decode_buffer_head)
     513                 :         {
     514                 :             /* There is space between tail and head. */
     515           21755 :             decoded = (DecodedXLogRecord *) state->decode_buffer_tail;
     516           21755 :             decoded->oversized = false;
     517           21755 :             return decoded;
     518                 :         }
     519                 :     }
     520                 : 
     521                 :     /* Not enough space in the decode buffer.  Are we allowed to allocate? */
     522              17 :     if (allow_oversized)
     523                 :     {
     524              13 :         decoded = palloc_extended(required_space, MCXT_ALLOC_NO_OOM);
     525              13 :         if (decoded == NULL)
     526 UBC           0 :             return NULL;
     527 CBC          13 :         decoded->oversized = true;
     528              13 :         return decoded;
     529                 :     }
     530                 : 
     531               4 :     return NULL;
     532                 : }
     533                 : 
     534                 : static XLogPageReadResult
     535         5026864 : XLogDecodeNextRecord(XLogReaderState *state, bool nonblocking)
     536                 : {
     537                 :     XLogRecPtr  RecPtr;
     538                 :     XLogRecord *record;
     539                 :     XLogRecPtr  targetPagePtr;
     540                 :     bool        randAccess;
     541                 :     uint32      len,
     542                 :                 total_len;
     543                 :     uint32      targetRecOff;
     544                 :     uint32      pageHeaderSize;
     545                 :     bool        assembled;
     546                 :     bool        gotheader;
     547                 :     int         readOff;
     548                 :     DecodedXLogRecord *decoded;
     549                 :     char       *errormsg;       /* not used */
     550                 : 
     551                 :     /*
     552                 :      * randAccess indicates whether to verify the previous-record pointer of
     553                 :      * the record we're reading.  We only do this if we're reading
     554                 :      * sequentially, which is what we initially assume.
     555                 :      */
     556         5026864 :     randAccess = false;
     557                 : 
     558                 :     /* reset error state */
     559         5026864 :     state->errormsg_buf[0] = '\0';
     560         5026864 :     decoded = NULL;
     561                 : 
     562         5026864 :     state->abortedRecPtr = InvalidXLogRecPtr;
     563         5026864 :     state->missingContrecPtr = InvalidXLogRecPtr;
     564                 : 
     565         5026864 :     RecPtr = state->NextRecPtr;
     566                 : 
     567         5026864 :     if (state->DecodeRecPtr != InvalidXLogRecPtr)
     568                 :     {
     569                 :         /* read the record after the one we just read */
     570                 : 
     571                 :         /*
     572                 :          * NextRecPtr is pointing to end+1 of the previous WAL record.  If
     573                 :          * we're at a page boundary, no more records can fit on the current
     574                 :          * page. We must skip over the page header, but we can't do that until
     575                 :          * we've read in the page, since the header size is variable.
     576                 :          */
     577                 :     }
     578                 :     else
     579                 :     {
     580                 :         /*
     581                 :          * Caller supplied a position to start at.
     582                 :          *
     583                 :          * In this case, NextRecPtr should already be pointing either to a
     584                 :          * valid record starting position or alternatively to the beginning of
     585                 :          * a page. See the header comments for XLogBeginRead.
     586                 :          */
     587 GNC        3655 :         Assert(RecPtr % XLOG_BLCKSZ == 0 || XRecOffIsValid(RecPtr));
     588 CBC        3655 :         randAccess = true;
     589 ECB             :     }
     590                 : 
     591 GIC     5026864 : restart:
     592 CBC     5026865 :     state->nonblocking = nonblocking;
     593         5026865 :     state->currRecPtr = RecPtr;
     594         5026865 :     assembled = false;
     595 ECB             : 
     596 GIC     5026865 :     targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
     597 CBC     5026865 :     targetRecOff = RecPtr % XLOG_BLCKSZ;
     598 ECB             : 
     599                 :     /*
     600                 :      * Read the page containing the record into state->readBuf. Request enough
     601                 :      * byte to cover the whole record header, or at least the part of it that
     602                 :      * fits on the same page.
     603                 :      */
     604 GIC     5026865 :     readOff = ReadPageInternal(state, targetPagePtr,
     605 CBC     5026865 :                                Min(targetRecOff + SizeOfXLogRecord, XLOG_BLCKSZ));
     606         5026714 :     if (readOff == XLREAD_WOULDBLOCK)
     607           13224 :         return XLREAD_WOULDBLOCK;
     608         5013490 :     else if (readOff < 0)
     609             238 :         goto err;
     610 ECB             : 
     611                 :     /*
     612                 :      * ReadPageInternal always returns at least the page header, so we can
     613                 :      * examine it now.
     614                 :      */
     615 GIC     5013252 :     pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
     616 CBC     5013252 :     if (targetRecOff == 0)
     617 ECB             :     {
     618                 :         /*
     619                 :          * At page start, so skip over page header.
     620                 :          */
     621 GIC        5117 :         RecPtr += pageHeaderSize;
     622 CBC        5117 :         targetRecOff = pageHeaderSize;
     623 ECB             :     }
     624 GIC     5008135 :     else if (targetRecOff < pageHeaderSize)
     625 ECB             :     {
     626 UNC           0 :         report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u",
     627               0 :                               LSN_FORMAT_ARGS(RecPtr),
     628                 :                               pageHeaderSize, targetRecOff);
     629 UBC           0 :         goto err;
     630                 :     }
     631 EUB             : 
     632 GIC     5013252 :     if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
     633                 :         targetRecOff == pageHeaderSize)
     634 ECB             :     {
     635 UIC           0 :         report_invalid_record(state, "contrecord is requested by %X/%X",
     636               0 :                               LSN_FORMAT_ARGS(RecPtr));
     637 UBC           0 :         goto err;
     638 EUB             :     }
     639                 : 
     640                 :     /* ReadPageInternal has verified the page header */
     641 GIC     5013252 :     Assert(pageHeaderSize <= readOff);
     642                 : 
     643 ECB             :     /*
     644                 :      * Read the record length.
     645                 :      *
     646                 :      * NB: Even though we use an XLogRecord pointer here, the whole record
     647                 :      * header might not fit on this page. xl_tot_len is the first field of the
     648                 :      * struct, so it must be on this page (the records are MAXALIGNed), but we
     649                 :      * cannot access any other fields until we've verified that we got the
     650                 :      * whole header.
     651                 :      */
     652 GIC     5013252 :     record = (XLogRecord *) (state->readBuf + RecPtr % XLOG_BLCKSZ);
     653         5013252 :     total_len = record->xl_tot_len;
     654 ECB             : 
     655                 :     /*
     656                 :      * If the whole record header is on this page, validate it immediately.
     657                 :      * Otherwise do just a basic sanity check on xl_tot_len, and validate the
     658                 :      * rest of the header after reading it from the next page.  The xl_tot_len
     659                 :      * check is necessary here to ensure that we enter the "Need to reassemble
     660                 :      * record" code path below; otherwise we might fail to apply
     661                 :      * ValidXLogRecordHeader at all.
     662                 :      */
     663 GIC     5013252 :     if (targetRecOff <= XLOG_BLCKSZ - SizeOfXLogRecord)
     664                 :     {
     665 CBC     5003337 :         if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr, record,
     666                 :                                    randAccess))
     667             179 :             goto err;
     668 GIC     5003158 :         gotheader = true;
     669 ECB             :     }
     670                 :     else
     671                 :     {
     672                 :         /* XXX: more validation should be done here */
     673 GIC        9915 :         if (total_len < SizeOfXLogRecord)
     674                 :         {
     675 LBC           0 :             report_invalid_record(state,
     676                 :                                   "invalid record length at %X/%X: expected at least %u, got %u",
     677 UBC           0 :                                   LSN_FORMAT_ARGS(RecPtr),
     678                 :                                   (uint32) SizeOfXLogRecord, total_len);
     679               0 :             goto err;
     680                 :         }
     681 GBC        9915 :         gotheader = false;
     682                 :     }
     683 ECB             : 
     684                 :     /*
     685                 :      * Find space to decode this record.  Don't allow oversized allocation if
     686                 :      * the caller requested nonblocking.  Otherwise, we *have* to try to
     687                 :      * decode the record now because the caller has nothing else to do, so
     688                 :      * allow an oversized record to be palloc'd if that turns out to be
     689                 :      * necessary.
     690                 :      */
     691 GIC     5013073 :     decoded = XLogReadRecordAlloc(state,
     692                 :                                   total_len,
     693 CBC     5013073 :                                   !nonblocking /* allow_oversized */ );
     694 GIC     5013073 :     if (decoded == NULL)
     695 ECB             :     {
     696                 :         /*
     697                 :          * There is no space in the decode buffer.  The caller should help
     698                 :          * with that problem by consuming some records.
     699                 :          */
     700 GIC           4 :         if (nonblocking)
     701               4 :             return XLREAD_WOULDBLOCK;
     702 ECB             : 
     703                 :         /* We failed to allocate memory for an oversized record. */
     704 UIC           0 :         report_invalid_record(state,
     705                 :                               "out of memory while trying to decode a record of length %u", total_len);
     706 UBC           0 :         goto err;
     707                 :     }
     708 EUB             : 
     709 GIC     5013069 :     len = XLOG_BLCKSZ - RecPtr % XLOG_BLCKSZ;
     710         5013069 :     if (total_len > len)
     711 ECB             :     {
     712                 :         /* Need to reassemble record */
     713                 :         char       *contdata;
     714                 :         XLogPageHeader pageHeader;
     715                 :         char       *buffer;
     716                 :         uint32      gotlen;
     717                 : 
     718 GIC       75444 :         assembled = true;
     719                 : 
     720 ECB             :         /*
     721                 :          * Enlarge readRecordBuf as needed.
     722                 :          */
     723 GIC       75444 :         if (total_len > state->readRecordBufSize &&
     724              18 :             !allocate_recordbuf(state, total_len))
     725 ECB             :         {
     726                 :             /* We treat this as a "bogus data" condition */
     727 UIC           0 :             report_invalid_record(state, "record length %u at %X/%X too long",
     728               0 :                                   total_len, LSN_FORMAT_ARGS(RecPtr));
     729 UBC           0 :             goto err;
     730 EUB             :         }
     731                 : 
     732                 :         /* Copy the first fragment of the record from the first page. */
     733 GIC       75444 :         memcpy(state->readRecordBuf,
     734           75444 :                state->readBuf + RecPtr % XLOG_BLCKSZ, len);
     735 CBC       75444 :         buffer = state->readRecordBuf + len;
     736           75444 :         gotlen = len;
     737 ECB             : 
     738                 :         do
     739                 :         {
     740                 :             /* Calculate pointer to beginning of next page */
     741 GIC       78233 :             targetPagePtr += XLOG_BLCKSZ;
     742                 : 
     743 ECB             :             /* Wait for the next page to become available */
     744 GIC       78233 :             readOff = ReadPageInternal(state, targetPagePtr,
     745           78233 :                                        Min(total_len - gotlen + SizeOfXLogShortPHD,
     746 ECB             :                                            XLOG_BLCKSZ));
     747                 : 
     748 GIC       78232 :             if (readOff == XLREAD_WOULDBLOCK)
     749             182 :                 return XLREAD_WOULDBLOCK;
     750 CBC       78050 :             else if (readOff < 0)
     751               3 :                 goto err;
     752 ECB             : 
     753 CBC       78047 :             Assert(SizeOfXLogShortPHD <= readOff);
     754                 : 
     755           78047 :             pageHeader = (XLogPageHeader) state->readBuf;
     756                 : 
     757 ECB             :             /*
     758                 :              * If we were expecting a continuation record and got an
     759                 :              * "overwrite contrecord" flag, that means the continuation record
     760                 :              * was overwritten with a different record.  Restart the read by
     761                 :              * assuming the address to read is the location where we found
     762                 :              * this flag; but keep track of the LSN of the record we were
     763                 :              * reading, for later verification.
     764                 :              */
     765 GIC       78047 :             if (pageHeader->xlp_info & XLP_FIRST_IS_OVERWRITE_CONTRECORD)
     766                 :             {
     767 CBC           1 :                 state->overwrittenRecPtr = RecPtr;
     768 GIC           1 :                 RecPtr = targetPagePtr;
     769 CBC           1 :                 goto restart;
     770 ECB             :             }
     771                 : 
     772                 :             /* Check that the continuation on next page looks valid */
     773 GIC       78046 :             if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
     774                 :             {
     775 LBC           0 :                 report_invalid_record(state,
     776                 :                                       "there is no contrecord flag at %X/%X",
     777 UBC           0 :                                       LSN_FORMAT_ARGS(RecPtr));
     778 UIC           0 :                 goto err;
     779 EUB             :             }
     780                 : 
     781                 :             /*
     782                 :              * Cross-check that xlp_rem_len agrees with how much of the record
     783                 :              * we expect there to be left.
     784                 :              */
     785 GIC       78046 :             if (pageHeader->xlp_rem_len == 0 ||
     786           78046 :                 total_len != (pageHeader->xlp_rem_len + gotlen))
     787 ECB             :             {
     788 LBC           0 :                 report_invalid_record(state,
     789                 :                                       "invalid contrecord length %u (expected %lld) at %X/%X",
     790 EUB             :                                       pageHeader->xlp_rem_len,
     791 UIC           0 :                                       ((long long) total_len) - gotlen,
     792               0 :                                       LSN_FORMAT_ARGS(RecPtr));
     793 UBC           0 :                 goto err;
     794 EUB             :             }
     795                 : 
     796                 :             /* Append the continuation from this page to the buffer */
     797 GIC       78046 :             pageHeaderSize = XLogPageHeaderSize(pageHeader);
     798                 : 
     799 CBC       78046 :             if (readOff < pageHeaderSize)
     800 UIC           0 :                 readOff = ReadPageInternal(state, targetPagePtr,
     801 ECB             :                                            pageHeaderSize);
     802 EUB             : 
     803 GIC       78046 :             Assert(pageHeaderSize <= readOff);
     804                 : 
     805 CBC       78046 :             contdata = (char *) state->readBuf + pageHeaderSize;
     806 GIC       78046 :             len = XLOG_BLCKSZ - pageHeaderSize;
     807 CBC       78046 :             if (pageHeader->xlp_rem_len < len)
     808           75256 :                 len = pageHeader->xlp_rem_len;
     809 ECB             : 
     810 CBC       78046 :             if (readOff < pageHeaderSize + len)
     811 UIC           0 :                 readOff = ReadPageInternal(state, targetPagePtr,
     812 LBC           0 :                                            pageHeaderSize + len);
     813 EUB             : 
     814 GBC       78046 :             memcpy(buffer, (char *) contdata, len);
     815 GIC       78046 :             buffer += len;
     816 CBC       78046 :             gotlen += len;
     817 ECB             : 
     818                 :             /* If we just reassembled the record header, validate it. */
     819 GIC       78046 :             if (!gotheader)
     820                 :             {
     821 CBC        9881 :                 record = (XLogRecord *) state->readRecordBuf;
     822 GIC        9881 :                 if (!ValidXLogRecordHeader(state, RecPtr, state->DecodeRecPtr,
     823 ECB             :                                            record, randAccess))
     824 LBC           0 :                     goto err;
     825 GIC        9881 :                 gotheader = true;
     826 EUB             :             }
     827 CBC       78046 :         } while (gotlen < total_len);
     828                 : 
     829           75257 :         Assert(gotheader);
     830                 : 
     831           75257 :         record = (XLogRecord *) state->readRecordBuf;
     832 GIC       75257 :         if (!ValidXLogRecord(state, record, RecPtr))
     833 LBC           0 :             goto err;
     834 ECB             : 
     835 GBC       75257 :         pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) state->readBuf);
     836 GIC       75257 :         state->DecodeRecPtr = RecPtr;
     837 CBC       75257 :         state->NextRecPtr = targetPagePtr + pageHeaderSize
     838           75257 :             + MAXALIGN(pageHeader->xlp_rem_len);
     839 ECB             :     }
     840                 :     else
     841                 :     {
     842                 :         /* Wait for the record data to become available */
     843 GIC     4937625 :         readOff = ReadPageInternal(state, targetPagePtr,
     844         4937625 :                                    Min(targetRecOff + total_len, XLOG_BLCKSZ));
     845 CBC     4937625 :         if (readOff == XLREAD_WOULDBLOCK)
     846 LBC           0 :             return XLREAD_WOULDBLOCK;
     847 CBC     4937625 :         else if (readOff < 0)
     848 UBC           0 :             goto err;
     849 ECB             : 
     850 EUB             :         /* Record does not cross a page boundary */
     851 GIC     4937625 :         if (!ValidXLogRecord(state, record, RecPtr))
     852 UIC           0 :             goto err;
     853 ECB             : 
     854 GBC     4937625 :         state->NextRecPtr = RecPtr + MAXALIGN(total_len);
     855                 : 
     856 CBC     4937625 :         state->DecodeRecPtr = RecPtr;
     857                 :     }
     858 ECB             : 
     859                 :     /*
     860                 :      * Special processing if it's an XLOG SWITCH record
     861                 :      */
     862 GIC     5012882 :     if (record->xl_rmid == RM_XLOG_ID &&
     863           32993 :         (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH)
     864 ECB             :     {
     865                 :         /* Pretend it extends to end of segment */
     866 GIC         112 :         state->NextRecPtr += state->segcxt.ws_segsize - 1;
     867             112 :         state->NextRecPtr -= XLogSegmentOffset(state->NextRecPtr, state->segcxt.ws_segsize);
     868 ECB             :     }
     869                 : 
     870 GIC     5012882 :     if (DecodeXLogRecord(state, decoded, record, RecPtr, &errormsg))
     871                 :     {
     872 ECB             :         /* Record the location of the next record. */
     873 GIC     5012882 :         decoded->next_lsn = state->NextRecPtr;
     874                 : 
     875 ECB             :         /*
     876                 :          * If it's in the decode buffer, mark the decode buffer space as
     877                 :          * occupied.
     878                 :          */
     879 GIC     5012882 :         if (!decoded->oversized)
     880                 :         {
     881 ECB             :             /* The new decode buffer head must be MAXALIGNed. */
     882 GIC     5012872 :             Assert(decoded->size == MAXALIGN(decoded->size));
     883         5012872 :             if ((char *) decoded == state->decode_buffer)
     884 CBC     2536389 :                 state->decode_buffer_tail = state->decode_buffer + decoded->size;
     885 ECB             :             else
     886 CBC     2476483 :                 state->decode_buffer_tail += decoded->size;
     887                 :         }
     888 ECB             : 
     889                 :         /* Insert it into the queue of decoded records. */
     890 GIC     5012882 :         Assert(state->decode_queue_tail != decoded);
     891         5012882 :         if (state->decode_queue_tail)
     892 CBC     2477076 :             state->decode_queue_tail->next = decoded;
     893         5012882 :         state->decode_queue_tail = decoded;
     894         5012882 :         if (!state->decode_queue_head)
     895         2535806 :             state->decode_queue_head = decoded;
     896         5012882 :         return XLREAD_SUCCESS;
     897 ECB             :     }
     898                 :     else
     899 UIC           0 :         return XLREAD_FAIL;
     900                 : 
     901 GBC         420 : err:
     902 GIC         420 :     if (assembled)
     903 ECB             :     {
     904                 :         /*
     905                 :          * We get here when a record that spans multiple pages needs to be
     906                 :          * assembled, but something went wrong -- perhaps a contrecord piece
     907                 :          * was lost.  If caller is WAL replay, it will know where the aborted
     908                 :          * record was and where to direct followup WAL to be written, marking
     909                 :          * the next piece with XLP_FIRST_IS_OVERWRITE_CONTRECORD, which will
     910                 :          * in turn signal downstream WAL consumers that the broken WAL record
     911                 :          * is to be ignored.
     912                 :          */
     913 GIC           3 :         state->abortedRecPtr = RecPtr;
     914               3 :         state->missingContrecPtr = targetPagePtr;
     915 ECB             : 
     916                 :         /*
     917                 :          * If we got here without reporting an error, report one now so that
     918                 :          * XLogPrefetcherReadRecord() doesn't bring us back a second time and
     919                 :          * clobber the above state.  Otherwise, the existing error takes
     920                 :          * precedence.
     921                 :          */
     922 GIC           3 :         if (!state->errormsg_buf[0])
     923               3 :             report_invalid_record(state,
     924 ECB             :                                   "missing contrecord at %X/%X",
     925 CBC           3 :                                   LSN_FORMAT_ARGS(RecPtr));
     926                 :     }
     927 ECB             : 
     928 GIC         420 :     if (decoded && decoded->oversized)
     929               2 :         pfree(decoded);
     930 ECB             : 
     931                 :     /*
     932                 :      * Invalidate the read state. We might read from a different source after
     933                 :      * failure.
     934                 :      */
     935 GIC         420 :     XLogReaderInvalReadState(state);
     936                 : 
     937 ECB             :     /*
     938                 :      * If an error was written to errmsg_buf, it'll be returned to the caller
     939                 :      * of XLogReadRecord() after all successfully decoded records from the
     940                 :      * read queue.
     941                 :      */
     942                 : 
     943 GIC         420 :     return XLREAD_FAIL;
     944                 : }
     945 ECB             : 
     946                 : /*
     947                 :  * Try to decode the next available record, and return it.  The record will
     948                 :  * also be returned to XLogNextRecord(), which must be called to 'consume'
     949                 :  * each record.
     950                 :  *
     951                 :  * If nonblocking is true, may return NULL due to lack of data or WAL decoding
     952                 :  * space.
     953                 :  */
     954                 : DecodedXLogRecord *
     955 GIC     5026864 : XLogReadAhead(XLogReaderState *state, bool nonblocking)
     956                 : {
     957 ECB             :     XLogPageReadResult result;
     958                 : 
     959 GIC     5026864 :     if (state->errormsg_deferred)
     960 UIC           0 :         return NULL;
     961 ECB             : 
     962 GBC     5026864 :     result = XLogDecodeNextRecord(state, nonblocking);
     963 GIC     5026712 :     if (result == XLREAD_SUCCESS)
     964 ECB             :     {
     965 CBC     5012882 :         Assert(state->decode_queue_tail != NULL);
     966 GIC     5012882 :         return state->decode_queue_tail;
     967 ECB             :     }
     968                 : 
     969 GIC       13830 :     return NULL;
     970                 : }
     971 ECB             : 
     972                 : /*
     973                 :  * Read a single xlog page including at least [pageptr, reqLen] of valid data
     974                 :  * via the page_read() callback.
     975                 :  *
     976                 :  * Returns XLREAD_FAIL if the required page cannot be read for some
     977                 :  * reason; errormsg_buf is set in that case (unless the error occurs in the
     978                 :  * page_read callback).
     979                 :  *
     980                 :  * Returns XLREAD_WOULDBLOCK if the requested data can't be read without
     981                 :  * waiting.  This can be returned only if the installed page_read callback
     982                 :  * respects the state->nonblocking flag, and cannot read the requested data
     983                 :  * immediately.
     984                 :  *
     985                 :  * We fetch the page from a reader-local cache if we know we have the required
     986                 :  * data and if there hasn't been any error since caching the data.
     987                 :  */
     988                 : static int
     989 GIC    10042753 : ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
     990                 : {
     991 ECB             :     int         readLen;
     992                 :     uint32      targetPageOff;
     993                 :     XLogSegNo   targetSegNo;
     994                 :     XLogPageHeader hdr;
     995                 : 
     996 GIC    10042753 :     Assert((pageptr % XLOG_BLCKSZ) == 0);
     997                 : 
     998 CBC    10042753 :     XLByteToSeg(pageptr, targetSegNo, state->segcxt.ws_segsize);
     999 GIC    10042753 :     targetPageOff = XLogSegmentOffset(pageptr, state->segcxt.ws_segsize);
    1000 ECB             : 
    1001                 :     /* check whether we have all the requested data already */
    1002 GIC    10042753 :     if (targetSegNo == state->seg.ws_segno &&
    1003        10039861 :         targetPageOff == state->segoff && reqLen <= state->readLen)
    1004 CBC     9928596 :         return state->readLen;
    1005 ECB             : 
    1006                 :     /*
    1007                 :      * Invalidate contents of internal buffer before read attempt.  Just set
    1008                 :      * the length to 0, rather than a full XLogReaderInvalReadState(), so we
    1009                 :      * don't forget the segment we last successfully read.
    1010                 :      */
    1011 GIC      114157 :     state->readLen = 0;
    1012                 : 
    1013 ECB             :     /*
    1014                 :      * Data is not in our buffer.
    1015                 :      *
    1016                 :      * Every time we actually read the segment, even if we looked at parts of
    1017                 :      * it before, we need to do verification as the page_read callback might
    1018                 :      * now be rereading data from a different source.
    1019                 :      *
    1020                 :      * Whenever switching to a new WAL segment, we read the first page of the
    1021                 :      * file and validate its header, even if that's not where the target
    1022                 :      * record is.  This is so that we can check the additional identification
    1023                 :      * info that is present in the first page's "long" header.
    1024                 :      */
    1025 GIC      114157 :     if (targetSegNo != state->seg.ws_segno && targetPageOff != 0)
    1026                 :     {
    1027 CBC        2224 :         XLogRecPtr  targetSegmentPtr = pageptr - targetPageOff;
    1028                 : 
    1029            2224 :         readLen = state->routine.page_read(state, targetSegmentPtr, XLOG_BLCKSZ,
    1030                 :                                            state->currRecPtr,
    1031 ECB             :                                            state->readBuf);
    1032 GIC        2224 :         if (readLen == XLREAD_WOULDBLOCK)
    1033 UIC           0 :             return XLREAD_WOULDBLOCK;
    1034 CBC        2224 :         else if (readLen < 0)
    1035 UBC           0 :             goto err;
    1036 ECB             : 
    1037 EUB             :         /* we can be sure to have enough WAL available, we scrolled back */
    1038 GIC        2224 :         Assert(readLen == XLOG_BLCKSZ);
    1039                 : 
    1040 CBC        2224 :         if (!XLogReaderValidatePageHeader(state, targetSegmentPtr,
    1041                 :                                           state->readBuf))
    1042 LBC           0 :             goto err;
    1043                 :     }
    1044 EUB             : 
    1045                 :     /*
    1046                 :      * First, read the requested data length, but at least a short page header
    1047                 :      * so that we can validate it.
    1048                 :      */
    1049 GIC      114157 :     readLen = state->routine.page_read(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
    1050                 :                                        state->currRecPtr,
    1051 ECB             :                                        state->readBuf);
    1052 GIC      114005 :     if (readLen == XLREAD_WOULDBLOCK)
    1053           13406 :         return XLREAD_WOULDBLOCK;
    1054 CBC      100599 :     else if (readLen < 0)
    1055             241 :         goto err;
    1056 ECB             : 
    1057 CBC      100358 :     Assert(readLen <= XLOG_BLCKSZ);
    1058                 : 
    1059 ECB             :     /* Do we have enough data to check the header length? */
    1060 GIC      100358 :     if (readLen <= SizeOfXLogShortPHD)
    1061 UIC           0 :         goto err;
    1062 ECB             : 
    1063 GBC      100358 :     Assert(readLen >= reqLen);
    1064                 : 
    1065 CBC      100358 :     hdr = (XLogPageHeader) state->readBuf;
    1066                 : 
    1067 ECB             :     /* still not enough */
    1068 GIC      100358 :     if (readLen < XLogPageHeaderSize(hdr))
    1069                 :     {
    1070 LBC           0 :         readLen = state->routine.page_read(state, pageptr, XLogPageHeaderSize(hdr),
    1071                 :                                            state->currRecPtr,
    1072 EUB             :                                            state->readBuf);
    1073 UIC           0 :         if (readLen == XLREAD_WOULDBLOCK)
    1074               0 :             return XLREAD_WOULDBLOCK;
    1075 UBC           0 :         else if (readLen < 0)
    1076               0 :             goto err;
    1077 EUB             :     }
    1078                 : 
    1079                 :     /*
    1080                 :      * Now that we know we have the full header, validate it.
    1081                 :      */
    1082 GIC      100358 :     if (!XLogReaderValidatePageHeader(state, pageptr, (char *) hdr))
    1083 UIC           0 :         goto err;
    1084 ECB             : 
    1085 EUB             :     /* update read state information */
    1086 GIC      100358 :     state->seg.ws_segno = targetSegNo;
    1087          100358 :     state->segoff = targetPageOff;
    1088 CBC      100358 :     state->readLen = readLen;
    1089 ECB             : 
    1090 CBC      100358 :     return readLen;
    1091                 : 
    1092             241 : err:
    1093 GIC         241 :     XLogReaderInvalReadState(state);
    1094 ECB             : 
    1095 CBC         241 :     return XLREAD_FAIL;
    1096                 : }
    1097 ECB             : 
    1098                 : /*
    1099                 :  * Invalidate the xlogreader's read state to force a re-read.
    1100                 :  */
    1101                 : static void
    1102 GIC         661 : XLogReaderInvalReadState(XLogReaderState *state)
    1103                 : {
    1104 CBC         661 :     state->seg.ws_segno = 0;
    1105 GIC         661 :     state->segoff = 0;
    1106 CBC         661 :     state->readLen = 0;
    1107             661 : }
    1108 ECB             : 
    1109                 : /*
    1110                 :  * Validate an XLOG record header.
    1111                 :  *
    1112                 :  * This is just a convenience subroutine to avoid duplicated code in
    1113                 :  * XLogReadRecord.  It's not intended for use from anywhere else.
    1114                 :  */
    1115                 : static bool
    1116 GIC     5013218 : ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
    1117                 :                       XLogRecPtr PrevRecPtr, XLogRecord *record,
    1118 ECB             :                       bool randAccess)
    1119                 : {
    1120 GIC     5013218 :     if (record->xl_tot_len < SizeOfXLogRecord)
    1121                 :     {
    1122 CBC         178 :         report_invalid_record(state,
    1123                 :                               "invalid record length at %X/%X: expected at least %u, got %u",
    1124             178 :                               LSN_FORMAT_ARGS(RecPtr),
    1125                 :                               (uint32) SizeOfXLogRecord, record->xl_tot_len);
    1126             178 :         return false;
    1127                 :     }
    1128         5013040 :     if (!RmgrIdIsValid(record->xl_rmid))
    1129                 :     {
    1130 LBC           0 :         report_invalid_record(state,
    1131                 :                               "invalid resource manager ID %u at %X/%X",
    1132 UBC           0 :                               record->xl_rmid, LSN_FORMAT_ARGS(RecPtr));
    1133 UIC           0 :         return false;
    1134 EUB             :     }
    1135 GBC     5013040 :     if (randAccess)
    1136                 :     {
    1137 ECB             :         /*
    1138                 :          * We can't exactly verify the prev-link, but surely it should be less
    1139                 :          * than the record's own address.
    1140                 :          */
    1141 GIC        3655 :         if (!(record->xl_prev < RecPtr))
    1142                 :         {
    1143 LBC           0 :             report_invalid_record(state,
    1144                 :                                   "record with incorrect prev-link %X/%X at %X/%X",
    1145 UBC           0 :                                   LSN_FORMAT_ARGS(record->xl_prev),
    1146 UIC           0 :                                   LSN_FORMAT_ARGS(RecPtr));
    1147 UBC           0 :             return false;
    1148 EUB             :         }
    1149                 :     }
    1150                 :     else
    1151                 :     {
    1152                 :         /*
    1153                 :          * Record's prev-link should exactly match our previous location. This
    1154                 :          * check guards against torn WAL pages where a stale but valid-looking
    1155                 :          * WAL record starts on a sector boundary.
    1156                 :          */
    1157 GIC     5009385 :         if (record->xl_prev != PrevRecPtr)
    1158                 :         {
    1159 CBC           1 :             report_invalid_record(state,
    1160                 :                                   "record with incorrect prev-link %X/%X at %X/%X",
    1161               1 :                                   LSN_FORMAT_ARGS(record->xl_prev),
    1162 GIC           1 :                                   LSN_FORMAT_ARGS(RecPtr));
    1163 CBC           1 :             return false;
    1164 ECB             :         }
    1165                 :     }
    1166                 : 
    1167 GIC     5013039 :     return true;
    1168                 : }
    1169 ECB             : 
    1170                 : 
    1171                 : /*
    1172                 :  * CRC-check an XLOG record.  We do not believe the contents of an XLOG
    1173                 :  * record (other than to the minimal extent of computing the amount of
    1174                 :  * data to read in) until we've checked the CRCs.
    1175                 :  *
    1176                 :  * We assume all of the record (that is, xl_tot_len bytes) has been read
    1177                 :  * into memory at *record.  Also, ValidXLogRecordHeader() has accepted the
    1178                 :  * record's header, which means in particular that xl_tot_len is at least
    1179                 :  * SizeOfXLogRecord.
    1180                 :  */
    1181                 : static bool
    1182 GIC     5012882 : ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
    1183                 : {
    1184 ECB             :     pg_crc32c   crc;
    1185                 : 
    1186                 :     /* Calculate the CRC */
    1187 GIC     5012882 :     INIT_CRC32C(crc);
    1188         5012882 :     COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
    1189 ECB             :     /* include the record header last */
    1190 CBC     5012882 :     COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
    1191 GIC     5012882 :     FIN_CRC32C(crc);
    1192 ECB             : 
    1193 CBC     5012882 :     if (!EQ_CRC32C(record->xl_crc, crc))
    1194                 :     {
    1195 LBC           0 :         report_invalid_record(state,
    1196                 :                               "incorrect resource manager data checksum in record at %X/%X",
    1197 UBC           0 :                               LSN_FORMAT_ARGS(recptr));
    1198 UIC           0 :         return false;
    1199 EUB             :     }
    1200                 : 
    1201 GIC     5012882 :     return true;
    1202                 : }
    1203 ECB             : 
    1204                 : /*
    1205                 :  * Validate a page header.
    1206                 :  *
    1207                 :  * Check if 'phdr' is valid as the header of the XLog page at position
    1208                 :  * 'recptr'.
    1209                 :  */
    1210                 : bool
    1211 GIC      161029 : XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
    1212                 :                              char *phdr)
    1213 ECB             : {
    1214                 :     XLogSegNo   segno;
    1215                 :     int32       offset;
    1216 GIC      161029 :     XLogPageHeader hdr = (XLogPageHeader) phdr;
    1217 ECB             : 
    1218 GIC      161029 :     Assert((recptr % XLOG_BLCKSZ) == 0);
    1219 ECB             : 
    1220 GIC      161029 :     XLByteToSeg(recptr, segno, state->segcxt.ws_segsize);
    1221 CBC      161029 :     offset = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
    1222 ECB             : 
    1223 GIC      161029 :     if (hdr->xlp_magic != XLOG_PAGE_MAGIC)
    1224                 :     {
    1225                 :         char        fname[MAXFNAMELEN];
    1226 ECB             : 
    1227 GIC           3 :         XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
    1228 ECB             : 
    1229 GIC           3 :         report_invalid_record(state,
    1230                 :                               "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
    1231               3 :                               hdr->xlp_magic,
    1232 ECB             :                               fname,
    1233 GNC           3 :                               LSN_FORMAT_ARGS(recptr),
    1234                 :                               offset);
    1235 CBC           3 :         return false;
    1236                 :     }
    1237                 : 
    1238          161026 :     if ((hdr->xlp_info & ~XLP_ALL_FLAGS) != 0)
    1239                 :     {
    1240                 :         char        fname[MAXFNAMELEN];
    1241                 : 
    1242 UBC           0 :         XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
    1243                 : 
    1244               0 :         report_invalid_record(state,
    1245                 :                               "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
    1246               0 :                               hdr->xlp_info,
    1247                 :                               fname,
    1248 UNC           0 :                               LSN_FORMAT_ARGS(recptr),
    1249 EUB             :                               offset);
    1250 UIC           0 :         return false;
    1251 EUB             :     }
    1252                 : 
    1253 GIC      161026 :     if (hdr->xlp_info & XLP_LONG_HEADER)
    1254 ECB             :     {
    1255 GIC        3056 :         XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
    1256 ECB             : 
    1257 GIC        3056 :         if (state->system_identifier &&
    1258 CBC        1748 :             longhdr->xlp_sysid != state->system_identifier)
    1259 ECB             :         {
    1260 UIC           0 :             report_invalid_record(state,
    1261 EUB             :                                   "WAL file is from different database system: WAL file database system identifier is %llu, pg_control database system identifier is %llu",
    1262 UIC           0 :                                   (unsigned long long) longhdr->xlp_sysid,
    1263 UBC           0 :                                   (unsigned long long) state->system_identifier);
    1264               0 :             return false;
    1265 EUB             :         }
    1266 GIC        3056 :         else if (longhdr->xlp_seg_size != state->segcxt.ws_segsize)
    1267 ECB             :         {
    1268 UIC           0 :             report_invalid_record(state,
    1269 EUB             :                                   "WAL file is from different database system: incorrect segment size in page header");
    1270 UIC           0 :             return false;
    1271 EUB             :         }
    1272 GIC        3056 :         else if (longhdr->xlp_xlog_blcksz != XLOG_BLCKSZ)
    1273 ECB             :         {
    1274 UIC           0 :             report_invalid_record(state,
    1275 EUB             :                                   "WAL file is from different database system: incorrect XLOG_BLCKSZ in page header");
    1276 UIC           0 :             return false;
    1277 EUB             :         }
    1278                 :     }
    1279 GIC      157970 :     else if (offset == 0)
    1280 ECB             :     {
    1281                 :         char        fname[MAXFNAMELEN];
    1282                 : 
    1283 UIC           0 :         XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
    1284 EUB             : 
    1285                 :         /* hmm, first page of file doesn't have a long header? */
    1286 UIC           0 :         report_invalid_record(state,
    1287                 :                               "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
    1288               0 :                               hdr->xlp_info,
    1289 EUB             :                               fname,
    1290 UNC           0 :                               LSN_FORMAT_ARGS(recptr),
    1291                 :                               offset);
    1292 UBC           0 :         return false;
    1293                 :     }
    1294 EUB             : 
    1295                 :     /*
    1296                 :      * Check that the address on the page agrees with what we expected. This
    1297                 :      * check typically fails when an old WAL segment is recycled, and hasn't
    1298                 :      * yet been overwritten with new data yet.
    1299                 :      */
    1300 GNC      161026 :     if (hdr->xlp_pageaddr != recptr)
    1301                 :     {
    1302 ECB             :         char        fname[MAXFNAMELEN];
    1303                 : 
    1304 GIC           3 :         XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
    1305                 : 
    1306 CBC           3 :         report_invalid_record(state,
    1307                 :                               "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
    1308               3 :                               LSN_FORMAT_ARGS(hdr->xlp_pageaddr),
    1309                 :                               fname,
    1310 GNC           3 :                               LSN_FORMAT_ARGS(recptr),
    1311 ECB             :                               offset);
    1312 GIC           3 :         return false;
    1313 ECB             :     }
    1314                 : 
    1315                 :     /*
    1316                 :      * Since child timelines are always assigned a TLI greater than their
    1317                 :      * immediate parent's TLI, we should never see TLI go backwards across
    1318                 :      * successive pages of a consistent WAL sequence.
    1319                 :      *
    1320                 :      * Sometimes we re-read a segment that's already been (partially) read. So
    1321                 :      * we only verify TLIs for pages that are later than the last remembered
    1322                 :      * LSN.
    1323                 :      */
    1324 GIC      161023 :     if (recptr > state->latestPagePtr)
    1325                 :     {
    1326           87646 :         if (hdr->xlp_tli < state->latestPageTLI)
    1327 ECB             :         {
    1328                 :             char        fname[MAXFNAMELEN];
    1329                 : 
    1330 UIC           0 :             XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
    1331                 : 
    1332               0 :             report_invalid_record(state,
    1333                 :                                   "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
    1334                 :                                   hdr->xlp_tli,
    1335 EUB             :                                   state->latestPageTLI,
    1336                 :                                   fname,
    1337 UNC           0 :                                   LSN_FORMAT_ARGS(recptr),
    1338                 :                                   offset);
    1339 UIC           0 :             return false;
    1340                 :         }
    1341 EUB             :     }
    1342 GIC      161023 :     state->latestPagePtr = recptr;
    1343 GBC      161023 :     state->latestPageTLI = hdr->xlp_tli;
    1344                 : 
    1345 GIC      161023 :     return true;
    1346 ECB             : }
    1347                 : 
    1348                 : /*
    1349                 :  * Forget about an error produced by XLogReaderValidatePageHeader().
    1350                 :  */
    1351                 : void
    1352 GIC           6 : XLogReaderResetError(XLogReaderState *state)
    1353                 : {
    1354               6 :     state->errormsg_buf[0] = '\0';
    1355               6 :     state->errormsg_deferred = false;
    1356 CBC           6 : }
    1357                 : 
    1358 ECB             : /*
    1359                 :  * Find the first record with an lsn >= RecPtr.
    1360                 :  *
    1361                 :  * This is different from XLogBeginRead() in that RecPtr doesn't need to point
    1362                 :  * to a valid record boundary.  Useful for checking whether RecPtr is a valid
    1363                 :  * xlog address for reading, and to find the first valid address after some
    1364                 :  * address when dumping records for debugging purposes.
    1365                 :  *
    1366                 :  * This positions the reader, like XLogBeginRead(), so that the next call to
    1367                 :  * XLogReadRecord() will read the next valid record.
    1368                 :  */
    1369                 : XLogRecPtr
    1370 GIC          15 : XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
    1371                 : {
    1372                 :     XLogRecPtr  tmpRecPtr;
    1373              15 :     XLogRecPtr  found = InvalidXLogRecPtr;
    1374 ECB             :     XLogPageHeader header;
    1375                 :     char       *errormsg;
    1376                 : 
    1377 CBC          15 :     Assert(!XLogRecPtrIsInvalid(RecPtr));
    1378                 : 
    1379                 :     /* Make sure ReadPageInternal() can't return XLREAD_WOULDBLOCK. */
    1380 GIC          15 :     state->nonblocking = false;
    1381 ECB             : 
    1382                 :     /*
    1383                 :      * skip over potential continuation data, keeping in mind that it may span
    1384                 :      * multiple pages
    1385                 :      */
    1386 GIC          15 :     tmpRecPtr = RecPtr;
    1387                 :     while (true)
    1388 UIC           0 :     {
    1389                 :         XLogRecPtr  targetPagePtr;
    1390 ECB             :         int         targetRecOff;
    1391                 :         uint32      pageHeaderSize;
    1392 EUB             :         int         readLen;
    1393                 : 
    1394                 :         /*
    1395                 :          * Compute targetRecOff. It should typically be equal or greater than
    1396                 :          * short page-header since a valid record can't start anywhere before
    1397                 :          * that, except when caller has explicitly specified the offset that
    1398                 :          * falls somewhere there or when we are skipping multi-page
    1399                 :          * continuation record. It doesn't matter though because
    1400                 :          * ReadPageInternal() is prepared to handle that and will read at
    1401                 :          * least short page-header worth of data
    1402                 :          */
    1403 GIC          15 :         targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
    1404                 : 
    1405                 :         /* scroll back to page boundary */
    1406              15 :         targetPagePtr = tmpRecPtr - targetRecOff;
    1407 ECB             : 
    1408                 :         /* Read the page containing the record */
    1409 GIC          15 :         readLen = ReadPageInternal(state, targetPagePtr, targetRecOff);
    1410 CBC          15 :         if (readLen < 0)
    1411 UIC           0 :             goto err;
    1412                 : 
    1413 CBC          15 :         header = (XLogPageHeader) state->readBuf;
    1414 ECB             : 
    1415 GBC          15 :         pageHeaderSize = XLogPageHeaderSize(header);
    1416                 : 
    1417 ECB             :         /* make sure we have enough data for the page header */
    1418 GIC          15 :         readLen = ReadPageInternal(state, targetPagePtr, pageHeaderSize);
    1419 CBC          15 :         if (readLen < 0)
    1420 UIC           0 :             goto err;
    1421                 : 
    1422 ECB             :         /* skip over potential continuation data */
    1423 CBC          15 :         if (header->xlp_info & XLP_FIRST_IS_CONTRECORD)
    1424 EUB             :         {
    1425                 :             /*
    1426                 :              * If the length of the remaining continuation data is more than
    1427 ECB             :              * what can fit in this page, the continuation record crosses over
    1428                 :              * this page. Read the next page and try again. xlp_rem_len in the
    1429                 :              * next page header will contain the remaining length of the
    1430                 :              * continuation data
    1431                 :              *
    1432                 :              * Note that record headers are MAXALIGN'ed
    1433                 :              */
    1434 GIC          13 :             if (MAXALIGN(header->xlp_rem_len) >= (XLOG_BLCKSZ - pageHeaderSize))
    1435 UIC           0 :                 tmpRecPtr = targetPagePtr + XLOG_BLCKSZ;
    1436                 :             else
    1437                 :             {
    1438 ECB             :                 /*
    1439 EUB             :                  * The previous continuation record ends in this page. Set
    1440                 :                  * tmpRecPtr to point to the first valid record
    1441                 :                  */
    1442 GIC          13 :                 tmpRecPtr = targetPagePtr + pageHeaderSize
    1443              13 :                     + MAXALIGN(header->xlp_rem_len);
    1444              13 :                 break;
    1445                 :             }
    1446 ECB             :         }
    1447                 :         else
    1448                 :         {
    1449 GIC           2 :             tmpRecPtr = targetPagePtr + pageHeaderSize;
    1450               2 :             break;
    1451                 :         }
    1452                 :     }
    1453 ECB             : 
    1454                 :     /*
    1455                 :      * we know now that tmpRecPtr is an address pointing to a valid XLogRecord
    1456                 :      * because either we're at the first record after the beginning of a page
    1457                 :      * or we just jumped over the remaining data of a continuation.
    1458                 :      */
    1459 GIC          15 :     XLogBeginRead(state, tmpRecPtr);
    1460             612 :     while (XLogReadRecord(state, &errormsg) != NULL)
    1461                 :     {
    1462                 :         /* past the record we've found, break out */
    1463 CBC         612 :         if (RecPtr <= state->ReadRecPtr)
    1464 ECB             :         {
    1465                 :             /* Rewind the reader to the beginning of the last record. */
    1466 GIC          15 :             found = state->ReadRecPtr;
    1467 CBC          15 :             XLogBeginRead(state, found);
    1468 GIC          15 :             return found;
    1469                 :         }
    1470 ECB             :     }
    1471                 : 
    1472 LBC           0 : err:
    1473 UIC           0 :     XLogReaderInvalReadState(state);
    1474                 : 
    1475               0 :     return InvalidXLogRecPtr;
    1476 EUB             : }
    1477                 : 
    1478                 : /*
    1479                 :  * Helper function to ease writing of XLogRoutine->page_read callbacks.
    1480                 :  * If this function is used, caller must supply a segment_open callback in
    1481                 :  * 'state', as that is used here.
    1482                 :  *
    1483                 :  * Read 'count' bytes into 'buf', starting at location 'startptr', from WAL
    1484                 :  * fetched from timeline 'tli'.
    1485                 :  *
    1486                 :  * Returns true if succeeded, false if an error occurs, in which case
    1487                 :  * 'errinfo' receives error details.
    1488                 :  *
    1489                 :  * XXX probably this should be improved to suck data directly from the
    1490                 :  * WAL buffers when possible.
    1491                 :  */
    1492                 : bool
    1493 GIC       61072 : WALRead(XLogReaderState *state,
    1494                 :         char *buf, XLogRecPtr startptr, Size count, TimeLineID tli,
    1495                 :         WALReadError *errinfo)
    1496                 : {
    1497 ECB             :     char       *p;
    1498                 :     XLogRecPtr  recptr;
    1499                 :     Size        nbytes;
    1500                 : 
    1501 GIC       61072 :     p = buf;
    1502           61072 :     recptr = startptr;
    1503           61072 :     nbytes = count;
    1504                 : 
    1505 CBC      122159 :     while (nbytes > 0)
    1506 ECB             :     {
    1507                 :         uint32      startoff;
    1508                 :         int         segbytes;
    1509                 :         int         readbytes;
    1510                 : 
    1511 GIC       61088 :         startoff = XLogSegmentOffset(recptr, state->segcxt.ws_segsize);
    1512                 : 
    1513                 :         /*
    1514                 :          * If the data we want is not in a segment we have open, close what we
    1515 ECB             :          * have (if anything) and open the next one, using the caller's
    1516                 :          * provided openSegment callback.
    1517                 :          */
    1518 GIC       61088 :         if (state->seg.ws_file < 0 ||
    1519           59716 :             !XLByteInSeg(recptr, state->seg.ws_segno, state->segcxt.ws_segsize) ||
    1520           59556 :             tli != state->seg.ws_tli)
    1521                 :         {
    1522 ECB             :             XLogSegNo   nextSegNo;
    1523                 : 
    1524 CBC        1532 :             if (state->seg.ws_file >= 0)
    1525 GIC         160 :                 state->routine.segment_close(state);
    1526                 : 
    1527            1532 :             XLByteToSeg(recptr, nextSegNo, state->segcxt.ws_segsize);
    1528 CBC        1532 :             state->routine.segment_open(state, nextSegNo, &tli);
    1529 ECB             : 
    1530                 :             /* This shouldn't happen -- indicates a bug in segment_open */
    1531 CBC        1531 :             Assert(state->seg.ws_file >= 0);
    1532 ECB             : 
    1533                 :             /* Update the current segment info. */
    1534 GIC        1531 :             state->seg.ws_tli = tli;
    1535 CBC        1531 :             state->seg.ws_segno = nextSegNo;
    1536                 :         }
    1537                 : 
    1538 ECB             :         /* How many bytes are within this segment? */
    1539 CBC       61087 :         if (nbytes > (state->segcxt.ws_segsize - startoff))
    1540 GIC          16 :             segbytes = state->segcxt.ws_segsize - startoff;
    1541                 :         else
    1542           61071 :             segbytes = nbytes;
    1543 ECB             : 
    1544                 : #ifndef FRONTEND
    1545 GIC       61087 :         pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
    1546 ECB             : #endif
    1547                 : 
    1548                 :         /* Reset errno first; eases reporting non-errno-affecting errors */
    1549 CBC       61087 :         errno = 0;
    1550 GIC       61087 :         readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff);
    1551                 : 
    1552                 : #ifndef FRONTEND
    1553 CBC       61087 :         pgstat_report_wait_end();
    1554 ECB             : #endif
    1555                 : 
    1556 GIC       61087 :         if (readbytes <= 0)
    1557 ECB             :         {
    1558 UIC           0 :             errinfo->wre_errno = errno;
    1559               0 :             errinfo->wre_req = segbytes;
    1560 LBC           0 :             errinfo->wre_read = readbytes;
    1561 UIC           0 :             errinfo->wre_off = startoff;
    1562 UBC           0 :             errinfo->wre_seg = state->seg;
    1563               0 :             return false;
    1564 EUB             :         }
    1565                 : 
    1566                 :         /* Update state for read */
    1567 GBC       61087 :         recptr += readbytes;
    1568 GIC       61087 :         nbytes -= readbytes;
    1569           61087 :         p += readbytes;
    1570                 :     }
    1571 ECB             : 
    1572 CBC       61071 :     return true;
    1573 ECB             : }
    1574                 : 
    1575                 : /* ----------------------------------------
    1576                 :  * Functions for decoding the data and block references in a record.
    1577                 :  * ----------------------------------------
    1578                 :  */
    1579                 : 
    1580                 : /*
    1581                 :  * Private function to reset the state, forgetting all decoded records, if we
    1582                 :  * are asked to move to a new read position.
    1583                 :  */
    1584                 : static void
    1585 GIC        3709 : ResetDecoder(XLogReaderState *state)
    1586                 : {
    1587                 :     DecodedXLogRecord *r;
    1588                 : 
    1589 ECB             :     /* Reset the decoded record queue, freeing any oversized records. */
    1590 GIC        8596 :     while ((r = state->decode_queue_head) != NULL)
    1591                 :     {
    1592            1178 :         state->decode_queue_head = r->next;
    1593            1178 :         if (r->oversized)
    1594 LBC           0 :             pfree(r);
    1595                 :     }
    1596 CBC        3709 :     state->decode_queue_tail = NULL;
    1597            3709 :     state->decode_queue_head = NULL;
    1598 GBC        3709 :     state->record = NULL;
    1599                 : 
    1600 ECB             :     /* Reset the decode buffer to empty. */
    1601 CBC        3709 :     state->decode_buffer_tail = state->decode_buffer;
    1602            3709 :     state->decode_buffer_head = state->decode_buffer;
    1603                 : 
    1604                 :     /* Clear error state. */
    1605            3709 :     state->errormsg_buf[0] = '\0';
    1606            3709 :     state->errormsg_deferred = false;
    1607 GIC        3709 : }
    1608                 : 
    1609 ECB             : /*
    1610                 :  * Compute the maximum possible amount of padding that could be required to
    1611                 :  * decode a record, given xl_tot_len from the record's header.  This is the
    1612                 :  * amount of output buffer space that we need to decode a record, though we
    1613                 :  * might not finish up using it all.
    1614                 :  *
    1615                 :  * This computation is pessimistic and assumes the maximum possible number of
    1616                 :  * blocks, due to lack of better information.
    1617                 :  */
    1618                 : size_t
    1619 GIC    10039173 : DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
    1620                 : {
    1621        10039173 :     size_t      size = 0;
    1622                 : 
    1623 ECB             :     /* Account for the fixed size part of the decoded record struct. */
    1624 GIC    10039173 :     size += offsetof(DecodedXLogRecord, blocks[0]);
    1625 ECB             :     /* Account for the flexible blocks array of maximum possible size. */
    1626 GIC    10039173 :     size += sizeof(DecodedBkpBlock) * (XLR_MAX_BLOCK_ID + 1);
    1627                 :     /* Account for all the raw main and block data. */
    1628 CBC    10039173 :     size += xl_tot_len;
    1629                 :     /* We might insert padding before main_data. */
    1630        10039173 :     size += (MAXIMUM_ALIGNOF - 1);
    1631                 :     /* We might insert padding before each block's data. */
    1632        10039173 :     size += (MAXIMUM_ALIGNOF - 1) * (XLR_MAX_BLOCK_ID + 1);
    1633                 :     /* We might insert padding at the end. */
    1634        10039173 :     size += (MAXIMUM_ALIGNOF - 1);
    1635                 : 
    1636        10039173 :     return size;
    1637                 : }
    1638 ECB             : 
    1639                 : /*
    1640                 :  * Decode a record.  "decoded" must point to a MAXALIGNed memory area that has
    1641                 :  * space for at least DecodeXLogRecordRequiredSpace(record) bytes.  On
    1642                 :  * success, decoded->size contains the actual space occupied by the decoded
    1643                 :  * record, which may turn out to be less.
    1644                 :  *
    1645                 :  * Only decoded->oversized member must be initialized already, and will not be
    1646                 :  * modified.  Other members will be initialized as required.
    1647                 :  *
    1648                 :  * On error, a human-readable error message is returned in *errormsg, and
    1649                 :  * the return value is false.
    1650                 :  */
    1651                 : bool
    1652 GIC     5012882 : DecodeXLogRecord(XLogReaderState *state,
    1653                 :                  DecodedXLogRecord *decoded,
    1654                 :                  XLogRecord *record,
    1655                 :                  XLogRecPtr lsn,
    1656 ECB             :                  char **errormsg)
    1657                 : {
    1658                 :     /*
    1659                 :      * read next _size bytes from record buffer, but check for overrun first.
    1660                 :      */
    1661                 : #define COPY_HEADER_FIELD(_dst, _size)          \
    1662                 :     do {                                        \
    1663                 :         if (remaining < _size)                   \
    1664                 :             goto shortdata_err;                 \
    1665                 :         memcpy(_dst, ptr, _size);               \
    1666                 :         ptr += _size;                           \
    1667                 :         remaining -= _size;                     \
    1668                 :     } while(0)
    1669                 : 
    1670                 :     char       *ptr;
    1671                 :     char       *out;
    1672                 :     uint32      remaining;
    1673                 :     uint32      datatotal;
    1674 GNC     5012882 :     RelFileLocator *rlocator = NULL;
    1675                 :     uint8       block_id;
    1676                 : 
    1677 GIC     5012882 :     decoded->header = *record;
    1678 CBC     5012882 :     decoded->lsn = lsn;
    1679 GIC     5012882 :     decoded->next = NULL;
    1680         5012882 :     decoded->record_origin = InvalidRepOriginId;
    1681 CBC     5012882 :     decoded->toplevel_xid = InvalidTransactionId;
    1682         5012882 :     decoded->main_data = NULL;
    1683         5012882 :     decoded->main_data_len = 0;
    1684         5012882 :     decoded->max_block_id = -1;
    1685         5012882 :     ptr = (char *) record;
    1686         5012882 :     ptr += SizeOfXLogRecord;
    1687         5012882 :     remaining = record->xl_tot_len - SizeOfXLogRecord;
    1688 ECB             : 
    1689                 :     /* Decode the headers */
    1690 CBC     5012882 :     datatotal = 0;
    1691        10315759 :     while (remaining > datatotal)
    1692                 :     {
    1693 GIC    10284929 :         COPY_HEADER_FIELD(&block_id, sizeof(uint8));
    1694 ECB             : 
    1695 CBC    10284929 :         if (block_id == XLR_BLOCK_ID_DATA_SHORT)
    1696                 :         {
    1697 ECB             :             /* XLogRecordDataHeaderShort */
    1698                 :             uint8       main_data_len;
    1699                 : 
    1700 GIC     4973683 :             COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
    1701                 : 
    1702         4973683 :             decoded->main_data_len = main_data_len;
    1703         4973683 :             datatotal += main_data_len;
    1704 CBC     4973683 :             break;              /* by convention, the main data fragment is
    1705                 :                                  * always last */
    1706 ECB             :         }
    1707 CBC     5311246 :         else if (block_id == XLR_BLOCK_ID_DATA_LONG)
    1708 ECB             :         {
    1709                 :             /* XLogRecordDataHeaderLong */
    1710                 :             uint32      main_data_len;
    1711                 : 
    1712 GIC        8369 :             COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
    1713            8369 :             decoded->main_data_len = main_data_len;
    1714            8369 :             datatotal += main_data_len;
    1715            8369 :             break;              /* by convention, the main data fragment is
    1716 ECB             :                                  * always last */
    1717                 :         }
    1718 CBC     5302877 :         else if (block_id == XLR_BLOCK_ID_ORIGIN)
    1719 ECB             :         {
    1720 GIC       12899 :             COPY_HEADER_FIELD(&decoded->record_origin, sizeof(RepOriginId));
    1721                 :         }
    1722 CBC     5289978 :         else if (block_id == XLR_BLOCK_ID_TOPLEVEL_XID)
    1723                 :         {
    1724             677 :             COPY_HEADER_FIELD(&decoded->toplevel_xid, sizeof(TransactionId));
    1725                 :         }
    1726         5289301 :         else if (block_id <= XLR_MAX_BLOCK_ID)
    1727                 :         {
    1728 ECB             :             /* XLogRecordBlockHeader */
    1729                 :             DecodedBkpBlock *blk;
    1730                 :             uint8       fork_flags;
    1731                 : 
    1732                 :             /* mark any intervening block IDs as not in use */
    1733 GIC     5291658 :             for (int i = decoded->max_block_id + 1; i < block_id; ++i)
    1734            2357 :                 decoded->blocks[i].in_use = false;
    1735                 : 
    1736         5289301 :             if (block_id <= decoded->max_block_id)
    1737 ECB             :             {
    1738 LBC           0 :                 report_invalid_record(state,
    1739                 :                                       "out-of-order block_id %u at %X/%X",
    1740 ECB             :                                       block_id,
    1741 UIC           0 :                                       LSN_FORMAT_ARGS(state->ReadRecPtr));
    1742 UBC           0 :                 goto err;
    1743                 :             }
    1744 GIC     5289301 :             decoded->max_block_id = block_id;
    1745 EUB             : 
    1746 GBC     5289301 :             blk = &decoded->blocks[block_id];
    1747 GIC     5289301 :             blk->in_use = true;
    1748 CBC     5289301 :             blk->apply_image = false;
    1749                 : 
    1750         5289301 :             COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
    1751         5289301 :             blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
    1752         5289301 :             blk->flags = fork_flags;
    1753 GIC     5289301 :             blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
    1754 CBC     5289301 :             blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
    1755 ECB             : 
    1756 CBC     5289301 :             blk->prefetch_buffer = InvalidBuffer;
    1757 ECB             : 
    1758 CBC     5289301 :             COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
    1759                 :             /* cross-check that the HAS_DATA flag is set iff data_length > 0 */
    1760         5289301 :             if (blk->has_data && blk->data_len == 0)
    1761                 :             {
    1762 LBC           0 :                 report_invalid_record(state,
    1763                 :                                       "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
    1764               0 :                                       LSN_FORMAT_ARGS(state->ReadRecPtr));
    1765 UIC           0 :                 goto err;
    1766 EUB             :             }
    1767 GIC     5289301 :             if (!blk->has_data && blk->data_len != 0)
    1768 EUB             :             {
    1769 UBC           0 :                 report_invalid_record(state,
    1770                 :                                       "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
    1771 LBC           0 :                                       (unsigned int) blk->data_len,
    1772 UIC           0 :                                       LSN_FORMAT_ARGS(state->ReadRecPtr));
    1773 UBC           0 :                 goto err;
    1774                 :             }
    1775 GBC     5289301 :             datatotal += blk->data_len;
    1776 EUB             : 
    1777 GBC     5289301 :             if (blk->has_image)
    1778                 :             {
    1779 CBC       44761 :                 COPY_HEADER_FIELD(&blk->bimg_len, sizeof(uint16));
    1780 GIC       44761 :                 COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
    1781 CBC       44761 :                 COPY_HEADER_FIELD(&blk->bimg_info, sizeof(uint8));
    1782                 : 
    1783           44761 :                 blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
    1784 ECB             : 
    1785 CBC       44761 :                 if (BKPIMAGE_COMPRESSED(blk->bimg_info))
    1786                 :                 {
    1787 LBC           0 :                     if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
    1788 UIC           0 :                         COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
    1789 ECB             :                     else
    1790 UIC           0 :                         blk->hole_length = 0;
    1791 EUB             :                 }
    1792                 :                 else
    1793 GIC       44761 :                     blk->hole_length = BLCKSZ - blk->bimg_len;
    1794 GBC       44761 :                 datatotal += blk->bimg_len;
    1795                 : 
    1796                 :                 /*
    1797 ECB             :                  * cross-check that hole_offset > 0, hole_length > 0 and
    1798                 :                  * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
    1799                 :                  */
    1800 GIC       44761 :                 if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
    1801           41467 :                     (blk->hole_offset == 0 ||
    1802           41467 :                      blk->hole_length == 0 ||
    1803           41467 :                      blk->bimg_len == BLCKSZ))
    1804 ECB             :                 {
    1805 LBC           0 :                     report_invalid_record(state,
    1806 ECB             :                                           "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
    1807 LBC           0 :                                           (unsigned int) blk->hole_offset,
    1808 UIC           0 :                                           (unsigned int) blk->hole_length,
    1809 UBC           0 :                                           (unsigned int) blk->bimg_len,
    1810 UIC           0 :                                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1811 UBC           0 :                     goto err;
    1812 EUB             :                 }
    1813                 : 
    1814                 :                 /*
    1815                 :                  * cross-check that hole_offset == 0 and hole_length == 0 if
    1816                 :                  * the HAS_HOLE flag is not set.
    1817                 :                  */
    1818 GIC       44761 :                 if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
    1819            3294 :                     (blk->hole_offset != 0 || blk->hole_length != 0))
    1820                 :                 {
    1821 UIC           0 :                     report_invalid_record(state,
    1822 ECB             :                                           "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
    1823 LBC           0 :                                           (unsigned int) blk->hole_offset,
    1824 UIC           0 :                                           (unsigned int) blk->hole_length,
    1825 UBC           0 :                                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1826 UIC           0 :                     goto err;
    1827 EUB             :                 }
    1828                 : 
    1829                 :                 /*
    1830                 :                  * Cross-check that bimg_len < BLCKSZ if it is compressed.
    1831                 :                  */
    1832 GIC       44761 :                 if (BKPIMAGE_COMPRESSED(blk->bimg_info) &&
    1833 UIC           0 :                     blk->bimg_len == BLCKSZ)
    1834                 :                 {
    1835               0 :                     report_invalid_record(state,
    1836 ECB             :                                           "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
    1837 UBC           0 :                                           (unsigned int) blk->bimg_len,
    1838 UIC           0 :                                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1839 UBC           0 :                     goto err;
    1840                 :                 }
    1841 EUB             : 
    1842                 :                 /*
    1843                 :                  * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE is
    1844                 :                  * set nor COMPRESSED().
    1845                 :                  */
    1846 GIC       44761 :                 if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
    1847            3294 :                     !BKPIMAGE_COMPRESSED(blk->bimg_info) &&
    1848            3294 :                     blk->bimg_len != BLCKSZ)
    1849                 :                 {
    1850 LBC           0 :                     report_invalid_record(state,
    1851 ECB             :                                           "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
    1852 LBC           0 :                                           (unsigned int) blk->data_len,
    1853 UIC           0 :                                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1854 UBC           0 :                     goto err;
    1855                 :                 }
    1856 EUB             :             }
    1857 GBC     5289301 :             if (!(fork_flags & BKPBLOCK_SAME_REL))
    1858 EUB             :             {
    1859 GNC     4919523 :                 COPY_HEADER_FIELD(&blk->rlocator, sizeof(RelFileLocator));
    1860         4919523 :                 rlocator = &blk->rlocator;
    1861 ECB             :             }
    1862                 :             else
    1863                 :             {
    1864 GNC      369778 :                 if (rlocator == NULL)
    1865                 :                 {
    1866 UIC           0 :                     report_invalid_record(state,
    1867                 :                                           "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
    1868 LBC           0 :                                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1869 UIC           0 :                     goto err;
    1870 EUB             :                 }
    1871                 : 
    1872 GNC      369778 :                 blk->rlocator = *rlocator;
    1873 EUB             :             }
    1874 GIC     5289301 :             COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
    1875                 :         }
    1876 ECB             :         else
    1877                 :         {
    1878 LBC           0 :             report_invalid_record(state,
    1879                 :                                   "invalid block_id %u at %X/%X",
    1880 UIC           0 :                                   block_id, LSN_FORMAT_ARGS(state->ReadRecPtr));
    1881               0 :             goto err;
    1882 EUB             :         }
    1883                 :     }
    1884                 : 
    1885 GBC     5012882 :     if (remaining != datatotal)
    1886 UIC           0 :         goto shortdata_err;
    1887                 : 
    1888                 :     /*
    1889 ECB             :      * Ok, we've parsed the fragment headers, and verified that the total
    1890 EUB             :      * length of the payload in the fragments is equal to the amount of data
    1891                 :      * left.  Copy the data of each fragment to contiguous space after the
    1892                 :      * blocks array, inserting alignment padding before the data fragments so
    1893                 :      * they can be cast to struct pointers by REDO routines.
    1894                 :      */
    1895 GIC     5012882 :     out = ((char *) decoded) +
    1896         5012882 :         offsetof(DecodedXLogRecord, blocks) +
    1897         5012882 :         sizeof(decoded->blocks[0]) * (decoded->max_block_id + 1);
    1898                 : 
    1899 ECB             :     /* block data first */
    1900 CBC    10304540 :     for (block_id = 0; block_id <= decoded->max_block_id; block_id++)
    1901 ECB             :     {
    1902 GIC     5291658 :         DecodedBkpBlock *blk = &decoded->blocks[block_id];
    1903                 : 
    1904 CBC     5291658 :         if (!blk->in_use)
    1905 GIC        2357 :             continue;
    1906 ECB             : 
    1907 GIC     5289301 :         Assert(blk->has_image || !blk->apply_image);
    1908 ECB             : 
    1909 CBC     5289301 :         if (blk->has_image)
    1910                 :         {
    1911 ECB             :             /* no need to align image */
    1912 GIC       44761 :             blk->bkp_image = out;
    1913 CBC       44761 :             memcpy(out, ptr, blk->bimg_len);
    1914 GIC       44761 :             ptr += blk->bimg_len;
    1915           44761 :             out += blk->bimg_len;
    1916 ECB             :         }
    1917 CBC     5289301 :         if (blk->has_data)
    1918 ECB             :         {
    1919 CBC     4027016 :             out = (char *) MAXALIGN(out);
    1920 GIC     4027016 :             blk->data = out;
    1921 CBC     4027016 :             memcpy(blk->data, ptr, blk->data_len);
    1922 GIC     4027016 :             ptr += blk->data_len;
    1923 CBC     4027016 :             out += blk->data_len;
    1924 ECB             :         }
    1925                 :     }
    1926                 : 
    1927                 :     /* and finally, the main data */
    1928 GIC     5012882 :     if (decoded->main_data_len > 0)
    1929                 :     {
    1930         4982052 :         out = (char *) MAXALIGN(out);
    1931         4982052 :         decoded->main_data = out;
    1932 CBC     4982052 :         memcpy(decoded->main_data, ptr, decoded->main_data_len);
    1933 GIC     4982052 :         ptr += decoded->main_data_len;
    1934 CBC     4982052 :         out += decoded->main_data_len;
    1935 ECB             :     }
    1936                 : 
    1937                 :     /* Report the actual size we used. */
    1938 CBC     5012882 :     decoded->size = MAXALIGN(out - (char *) decoded);
    1939 GIC     5012882 :     Assert(DecodeXLogRecordRequiredSpace(record->xl_tot_len) >=
    1940                 :            decoded->size);
    1941                 : 
    1942 CBC     5012882 :     return true;
    1943 ECB             : 
    1944 UIC           0 : shortdata_err:
    1945               0 :     report_invalid_record(state,
    1946 ECB             :                           "record with invalid length at %X/%X",
    1947 UIC           0 :                           LSN_FORMAT_ARGS(state->ReadRecPtr));
    1948 UBC           0 : err:
    1949               0 :     *errormsg = state->errormsg_buf;
    1950                 : 
    1951               0 :     return false;
    1952 EUB             : }
    1953                 : 
    1954                 : /*
    1955                 :  * Returns information about the block that a block reference refers to.
    1956                 :  *
    1957                 :  * This is like XLogRecGetBlockTagExtended, except that the block reference
    1958                 :  * must exist and there's no access to prefetch_buffer.
    1959                 :  */
    1960                 : void
    1961 GIC     3310125 : XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
    1962                 :                    RelFileLocator *rlocator, ForkNumber *forknum,
    1963                 :                    BlockNumber *blknum)
    1964                 : {
    1965 GNC     3310125 :     if (!XLogRecGetBlockTagExtended(record, block_id, rlocator, forknum,
    1966                 :                                     blknum, NULL))
    1967                 :     {
    1968                 : #ifndef FRONTEND
    1969 UIC           0 :         elog(ERROR, "could not locate backup block with ID %d in WAL record",
    1970 ECB             :              block_id);
    1971                 : #else
    1972                 :         pg_fatal("could not locate backup block with ID %d in WAL record",
    1973                 :                  block_id);
    1974 EUB             : #endif
    1975                 :     }
    1976 GIC     3310125 : }
    1977                 : 
    1978                 : /*
    1979                 :  * Returns information about the block that a block reference refers to,
    1980                 :  * optionally including the buffer that the block may already be in.
    1981 ECB             :  *
    1982                 :  * If the WAL record contains a block reference with the given ID, *rlocator,
    1983                 :  * *forknum, *blknum and *prefetch_buffer are filled in (if not NULL), and
    1984                 :  * returns true.  Otherwise returns false.
    1985                 :  */
    1986                 : bool
    1987 GIC     6089138 : XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
    1988                 :                            RelFileLocator *rlocator, ForkNumber *forknum,
    1989                 :                            BlockNumber *blknum,
    1990                 :                            Buffer *prefetch_buffer)
    1991                 : {
    1992 ECB             :     DecodedBkpBlock *bkpb;
    1993                 : 
    1994 GIC     6089138 :     if (!XLogRecHasBlockRef(record, block_id))
    1995           33965 :         return false;
    1996                 : 
    1997         6055173 :     bkpb = &record->record->blocks[block_id];
    1998 GNC     6055173 :     if (rlocator)
    1999         5996547 :         *rlocator = bkpb->rlocator;
    2000 CBC     6055173 :     if (forknum)
    2001 GIC     2691196 :         *forknum = bkpb->forknum;
    2002 CBC     6055173 :     if (blknum)
    2003         4369662 :         *blknum = bkpb->blkno;
    2004         6055173 :     if (prefetch_buffer)
    2005         2654768 :         *prefetch_buffer = bkpb->prefetch_buffer;
    2006         6055173 :     return true;
    2007 ECB             : }
    2008                 : 
    2009                 : /*
    2010                 :  * Returns the data associated with a block reference, or NULL if there is
    2011                 :  * no data (e.g. because a full-page image was taken instead). The returned
    2012                 :  * pointer points to a MAXALIGNed buffer.
    2013                 :  */
    2014                 : char *
    2015 GIC     3436025 : XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
    2016                 : {
    2017                 :     DecodedBkpBlock *bkpb;
    2018                 : 
    2019         3436025 :     if (block_id > record->record->max_block_id ||
    2020 CBC     3436025 :         !record->record->blocks[block_id].in_use)
    2021 UIC           0 :         return NULL;
    2022                 : 
    2023 GIC     3436025 :     bkpb = &record->record->blocks[block_id];
    2024 ECB             : 
    2025 CBC     3436025 :     if (!bkpb->has_data)
    2026 EUB             :     {
    2027 GIC         224 :         if (len)
    2028 CBC         224 :             *len = 0;
    2029 GIC         224 :         return NULL;
    2030 ECB             :     }
    2031                 :     else
    2032                 :     {
    2033 CBC     3435801 :         if (len)
    2034         3432270 :             *len = bkpb->data_len;
    2035 GIC     3435801 :         return bkpb->data;
    2036                 :     }
    2037                 : }
    2038 ECB             : 
    2039                 : /*
    2040                 :  * Restore a full-page image from a backup block attached to an XLOG record.
    2041                 :  *
    2042                 :  * Returns true if a full-page image is restored, and false on failure with
    2043                 :  * an error to be consumed by the caller.
    2044                 :  */
    2045                 : bool
    2046 GIC       36508 : RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
    2047                 : {
    2048                 :     DecodedBkpBlock *bkpb;
    2049                 :     char       *ptr;
    2050                 :     PGAlignedBlock tmp;
    2051 ECB             : 
    2052 GIC       36508 :     if (block_id > record->record->max_block_id ||
    2053           36508 :         !record->record->blocks[block_id].in_use)
    2054                 :     {
    2055 UIC           0 :         report_invalid_record(record,
    2056                 :                               "could not restore image at %X/%X with invalid block %d specified",
    2057 LBC           0 :                               LSN_FORMAT_ARGS(record->ReadRecPtr),
    2058 ECB             :                               block_id);
    2059 UIC           0 :         return false;
    2060 EUB             :     }
    2061 GIC       36508 :     if (!record->record->blocks[block_id].has_image)
    2062 EUB             :     {
    2063 UIC           0 :         report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
    2064 UBC           0 :                               LSN_FORMAT_ARGS(record->ReadRecPtr),
    2065                 :                               block_id);
    2066 LBC           0 :         return false;
    2067                 :     }
    2068 EUB             : 
    2069 GBC       36508 :     bkpb = &record->record->blocks[block_id];
    2070 GIC       36508 :     ptr = bkpb->bkp_image;
    2071 EUB             : 
    2072 GIC       36508 :     if (BKPIMAGE_COMPRESSED(bkpb->bimg_info))
    2073                 :     {
    2074 ECB             :         /* If a backup block image is compressed, decompress it */
    2075 LBC           0 :         bool        decomp_success = true;
    2076                 : 
    2077               0 :         if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
    2078                 :         {
    2079 UIC           0 :             if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
    2080 UBC           0 :                                 BLCKSZ - bkpb->hole_length, true) < 0)
    2081 UIC           0 :                 decomp_success = false;
    2082 EUB             :         }
    2083 UIC           0 :         else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
    2084 EUB             :         {
    2085                 : #ifdef USE_LZ4
    2086 UBC           0 :             if (LZ4_decompress_safe(ptr, tmp.data,
    2087 UIC           0 :                                     bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
    2088 UBC           0 :                 decomp_success = false;
    2089                 : #else
    2090                 :             report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
    2091 EUB             :                                   LSN_FORMAT_ARGS(record->ReadRecPtr),
    2092                 :                                   "LZ4",
    2093                 :                                   block_id);
    2094                 :             return false;
    2095                 : #endif
    2096                 :         }
    2097 UIC           0 :         else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_ZSTD) != 0)
    2098                 :         {
    2099                 : #ifdef USE_ZSTD
    2100               0 :             size_t      decomp_result = ZSTD_decompress(tmp.data,
    2101               0 :                                                         BLCKSZ - bkpb->hole_length,
    2102 UBC           0 :                                                         ptr, bkpb->bimg_len);
    2103                 : 
    2104 UIC           0 :             if (ZSTD_isError(decomp_result))
    2105 UBC           0 :                 decomp_success = false;
    2106 EUB             : #else
    2107                 :             report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
    2108                 :                                   LSN_FORMAT_ARGS(record->ReadRecPtr),
    2109                 :                                   "zstd",
    2110                 :                                   block_id);
    2111                 :             return false;
    2112                 : #endif
    2113                 :         }
    2114                 :         else
    2115                 :         {
    2116 UIC           0 :             report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
    2117               0 :                                   LSN_FORMAT_ARGS(record->ReadRecPtr),
    2118                 :                                   block_id);
    2119               0 :             return false;
    2120                 :         }
    2121 EUB             : 
    2122 UBC           0 :         if (!decomp_success)
    2123                 :         {
    2124               0 :             report_invalid_record(record, "could not decompress image at %X/%X, block %d",
    2125 UIC           0 :                                   LSN_FORMAT_ARGS(record->ReadRecPtr),
    2126                 :                                   block_id);
    2127 UBC           0 :             return false;
    2128                 :         }
    2129 EUB             : 
    2130 UBC           0 :         ptr = tmp.data;
    2131                 :     }
    2132 EUB             : 
    2133                 :     /* generate page, taking into account hole if necessary */
    2134 GIC       36508 :     if (bkpb->hole_length == 0)
    2135 EUB             :     {
    2136 GIC        1510 :         memcpy(page, ptr, BLCKSZ);
    2137                 :     }
    2138                 :     else
    2139 ECB             :     {
    2140 GIC       34998 :         memcpy(page, ptr, bkpb->hole_offset);
    2141 ECB             :         /* must zero-fill the hole */
    2142 GIC      258577 :         MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
    2143           34998 :         memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
    2144           34998 :                ptr + bkpb->hole_offset,
    2145 CBC       34998 :                BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
    2146                 :     }
    2147 ECB             : 
    2148 CBC       36508 :     return true;
    2149 ECB             : }
    2150                 : 
    2151                 : #ifndef FRONTEND
    2152                 : 
    2153                 : /*
    2154                 :  * Extract the FullTransactionId from a WAL record.
    2155                 :  */
    2156                 : FullTransactionId
    2157 UIC           0 : XLogRecGetFullXid(XLogReaderState *record)
    2158                 : {
    2159                 :     TransactionId xid,
    2160                 :                 next_xid;
    2161                 :     uint32      epoch;
    2162 EUB             : 
    2163                 :     /*
    2164                 :      * This function is only safe during replay, because it depends on the
    2165                 :      * replay state.  See AdvanceNextFullTransactionIdPastXid() for more.
    2166                 :      */
    2167 UIC           0 :     Assert(AmStartupProcess() || !IsUnderPostmaster);
    2168                 : 
    2169               0 :     xid = XLogRecGetXid(record);
    2170               0 :     next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid);
    2171               0 :     epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid);
    2172 EUB             : 
    2173                 :     /*
    2174                 :      * If xid is numerically greater than next_xid, it has to be from the last
    2175                 :      * epoch.
    2176                 :      */
    2177 UIC           0 :     if (unlikely(xid > next_xid))
    2178               0 :         --epoch;
    2179                 : 
    2180               0 :     return FullTransactionIdFromEpochAndXid(epoch, xid);
    2181                 : }
    2182 EUB             : 
    2183                 : #endif
        

Generated by: LCOV version v1.16-55-g56c0a2a