Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * xlogrecovery.c
4 : : * Functions for WAL recovery, standby mode
5 : : *
6 : : * This source file contains functions controlling WAL recovery.
7 : : * InitWalRecovery() initializes the system for crash or archive recovery,
8 : : * or standby mode, depending on configuration options and the state of
9 : : * the control file and possible backup label file. PerformWalRecovery()
10 : : * performs the actual WAL replay, calling the rmgr-specific redo routines.
11 : : * FinishWalRecovery() performs end-of-recovery checks and cleanup actions,
12 : : * and prepares information needed to initialize the WAL for writes. In
13 : : * addition to these three main functions, there are a bunch of functions
14 : : * for interrogating recovery state and controlling the recovery process.
15 : : *
16 : : *
17 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
18 : : * Portions Copyright (c) 1994, Regents of the University of California
19 : : *
20 : : * src/backend/access/transam/xlogrecovery.c
21 : : *
22 : : *-------------------------------------------------------------------------
23 : : */
24 : :
25 : : #include "postgres.h"
26 : :
27 : : #include <ctype.h>
28 : : #include <math.h>
29 : : #include <time.h>
30 : : #include <sys/stat.h>
31 : : #include <sys/time.h>
32 : : #include <unistd.h>
33 : :
34 : : #include "access/timeline.h"
35 : : #include "access/transam.h"
36 : : #include "access/xact.h"
37 : : #include "access/xlog_internal.h"
38 : : #include "access/xlogarchive.h"
39 : : #include "access/xlogprefetcher.h"
40 : : #include "access/xlogreader.h"
41 : : #include "access/xlogrecovery.h"
42 : : #include "access/xlogutils.h"
43 : : #include "backup/basebackup.h"
44 : : #include "catalog/pg_control.h"
45 : : #include "commands/tablespace.h"
46 : : #include "common/file_utils.h"
47 : : #include "miscadmin.h"
48 : : #include "pgstat.h"
49 : : #include "postmaster/bgwriter.h"
50 : : #include "postmaster/startup.h"
51 : : #include "replication/slot.h"
52 : : #include "replication/slotsync.h"
53 : : #include "replication/walreceiver.h"
54 : : #include "storage/fd.h"
55 : : #include "storage/ipc.h"
56 : : #include "storage/latch.h"
57 : : #include "storage/pmsignal.h"
58 : : #include "storage/procarray.h"
59 : : #include "storage/spin.h"
60 : : #include "utils/datetime.h"
61 : : #include "utils/fmgrprotos.h"
62 : : #include "utils/guc_hooks.h"
63 : : #include "utils/pg_lsn.h"
64 : : #include "utils/ps_status.h"
65 : : #include "utils/pg_rusage.h"
66 : :
67 : : /* Unsupported old recovery command file names (relative to $PGDATA) */
68 : : #define RECOVERY_COMMAND_FILE "recovery.conf"
69 : : #define RECOVERY_COMMAND_DONE "recovery.done"
70 : :
71 : : /*
72 : : * GUC support
73 : : */
74 : : const struct config_enum_entry recovery_target_action_options[] = {
75 : : {"pause", RECOVERY_TARGET_ACTION_PAUSE, false},
76 : : {"promote", RECOVERY_TARGET_ACTION_PROMOTE, false},
77 : : {"shutdown", RECOVERY_TARGET_ACTION_SHUTDOWN, false},
78 : : {NULL, 0, false}
79 : : };
80 : :
81 : : /* options formerly taken from recovery.conf for archive recovery */
82 : : char *recoveryRestoreCommand = NULL;
83 : : char *recoveryEndCommand = NULL;
84 : : char *archiveCleanupCommand = NULL;
85 : : RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
86 : : bool recoveryTargetInclusive = true;
87 : : int recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
88 : : TransactionId recoveryTargetXid;
89 : : char *recovery_target_time_string;
90 : : TimestampTz recoveryTargetTime;
91 : : const char *recoveryTargetName;
92 : : XLogRecPtr recoveryTargetLSN;
93 : : int recovery_min_apply_delay = 0;
94 : :
95 : : /* options formerly taken from recovery.conf for XLOG streaming */
96 : : char *PrimaryConnInfo = NULL;
97 : : char *PrimarySlotName = NULL;
98 : : bool wal_receiver_create_temp_slot = false;
99 : :
100 : : /*
101 : : * recoveryTargetTimeLineGoal: what the user requested, if any
102 : : *
103 : : * recoveryTargetTLIRequested: numeric value of requested timeline, if constant
104 : : *
105 : : * recoveryTargetTLI: the currently understood target timeline; changes
106 : : *
107 : : * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and
108 : : * the timelines of its known parents, newest first (so recoveryTargetTLI is
109 : : * always the first list member). Only these TLIs are expected to be seen in
110 : : * the WAL segments we read, and indeed only these TLIs will be considered as
111 : : * candidate WAL files to open at all.
112 : : *
113 : : * curFileTLI: the TLI appearing in the name of the current input WAL file.
114 : : * (This is not necessarily the same as the timeline from which we are
115 : : * replaying WAL, which StartupXLOG calls replayTLI, because we could be
116 : : * scanning data that was copied from an ancestor timeline when the current
117 : : * file was created.) During a sequential scan we do not allow this value
118 : : * to decrease.
119 : : */
120 : : RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST;
121 : : TimeLineID recoveryTargetTLIRequested = 0;
122 : : TimeLineID recoveryTargetTLI = 0;
123 : : static List *expectedTLEs;
124 : : static TimeLineID curFileTLI;
125 : :
126 : : /*
127 : : * When ArchiveRecoveryRequested is set, archive recovery was requested,
128 : : * ie. signal files were present. When InArchiveRecovery is set, we are
129 : : * currently recovering using offline XLOG archives. These variables are only
130 : : * valid in the startup process.
131 : : *
132 : : * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
133 : : * currently performing crash recovery using only XLOG files in pg_wal, but
134 : : * will switch to using offline XLOG archives as soon as we reach the end of
135 : : * WAL in pg_wal.
136 : : */
137 : : bool ArchiveRecoveryRequested = false;
138 : : bool InArchiveRecovery = false;
139 : :
140 : : /*
141 : : * When StandbyModeRequested is set, standby mode was requested, i.e.
142 : : * standby.signal file was present. When StandbyMode is set, we are currently
143 : : * in standby mode. These variables are only valid in the startup process.
144 : : * They work similarly to ArchiveRecoveryRequested and InArchiveRecovery.
145 : : */
146 : : static bool StandbyModeRequested = false;
147 : : bool StandbyMode = false;
148 : :
149 : : /* was a signal file present at startup? */
150 : : static bool standby_signal_file_found = false;
151 : : static bool recovery_signal_file_found = false;
152 : :
153 : : /*
154 : : * CheckPointLoc is the position of the checkpoint record that determines
155 : : * where to start the replay. It comes from the backup label file or the
156 : : * control file.
157 : : *
158 : : * RedoStartLSN is the checkpoint's REDO location, also from the backup label
159 : : * file or the control file. In standby mode, XLOG streaming usually starts
160 : : * from the position where an invalid record was found. But if we fail to
161 : : * read even the initial checkpoint record, we use the REDO location instead
162 : : * of the checkpoint location as the start position of XLOG streaming.
163 : : * Otherwise we would have to jump backwards to the REDO location after
164 : : * reading the checkpoint record, because the REDO record can precede the
165 : : * checkpoint record.
166 : : */
167 : : static XLogRecPtr CheckPointLoc = InvalidXLogRecPtr;
168 : : static TimeLineID CheckPointTLI = 0;
169 : : static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
170 : : static TimeLineID RedoStartTLI = 0;
171 : :
172 : : /*
173 : : * Local copy of SharedHotStandbyActive variable. False actually means "not
174 : : * known, need to check the shared state".
175 : : */
176 : : static bool LocalHotStandbyActive = false;
177 : :
178 : : /*
179 : : * Local copy of SharedPromoteIsTriggered variable. False actually means "not
180 : : * known, need to check the shared state".
181 : : */
182 : : static bool LocalPromoteIsTriggered = false;
183 : :
184 : : /* Has the recovery code requested a walreceiver wakeup? */
185 : : static bool doRequestWalReceiverReply;
186 : :
187 : : /* XLogReader object used to parse the WAL records */
188 : : static XLogReaderState *xlogreader = NULL;
189 : :
190 : : /* XLogPrefetcher object used to consume WAL records with read-ahead */
191 : : static XLogPrefetcher *xlogprefetcher = NULL;
192 : :
193 : : /* Parameters passed down from ReadRecord to the XLogPageRead callback. */
194 : : typedef struct XLogPageReadPrivate
195 : : {
196 : : int emode;
197 : : bool fetching_ckpt; /* are we fetching a checkpoint record? */
198 : : bool randAccess;
199 : : TimeLineID replayTLI;
200 : : } XLogPageReadPrivate;
201 : :
202 : : /* flag to tell XLogPageRead that we have started replaying */
203 : : static bool InRedo = false;
204 : :
205 : : /*
206 : : * Codes indicating where we got a WAL file from during recovery, or where
207 : : * to attempt to get one.
208 : : */
209 : : typedef enum
210 : : {
211 : : XLOG_FROM_ANY = 0, /* request to read WAL from any source */
212 : : XLOG_FROM_ARCHIVE, /* restored using restore_command */
213 : : XLOG_FROM_PG_WAL, /* existing file in pg_wal */
214 : : XLOG_FROM_STREAM, /* streamed from primary */
215 : : } XLogSource;
216 : :
217 : : /* human-readable names for XLogSources, for debugging output */
218 : : static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
219 : :
220 : : /*
221 : : * readFile is -1 or a kernel FD for the log file segment that's currently
222 : : * open for reading. readSegNo identifies the segment. readOff is the offset
223 : : * of the page just read, readLen indicates how much of it has been read into
224 : : * readBuf, and readSource indicates where we got the currently open file from.
225 : : *
226 : : * Note: we could use Reserve/ReleaseExternalFD to track consumption of this
227 : : * FD too (like for openLogFile in xlog.c); but it doesn't currently seem
228 : : * worthwhile, since the XLOG is not read by general-purpose sessions.
229 : : */
230 : : static int readFile = -1;
231 : : static XLogSegNo readSegNo = 0;
232 : : static uint32 readOff = 0;
233 : : static uint32 readLen = 0;
234 : : static XLogSource readSource = XLOG_FROM_ANY;
235 : :
236 : : /*
237 : : * Keeps track of which source we're currently reading from. This is
238 : : * different from readSource in that this is always set, even when we don't
239 : : * currently have a WAL file open. If lastSourceFailed is set, our last
240 : : * attempt to read from currentSource failed, and we should try another source
241 : : * next.
242 : : *
243 : : * pendingWalRcvRestart is set when a config change occurs that requires a
244 : : * walreceiver restart. This is only valid in XLOG_FROM_STREAM state.
245 : : */
246 : : static XLogSource currentSource = XLOG_FROM_ANY;
247 : : static bool lastSourceFailed = false;
248 : : static bool pendingWalRcvRestart = false;
249 : :
250 : : /*
251 : : * These variables track when we last obtained some WAL data to process,
252 : : * and where we got it from. (XLogReceiptSource is initially the same as
253 : : * readSource, but readSource gets reset to zero when we don't have data
254 : : * to process right now. It is also different from currentSource, which
255 : : * also changes when we try to read from a source and fail, while
256 : : * XLogReceiptSource tracks where we last successfully read some WAL.)
257 : : */
258 : : static TimestampTz XLogReceiptTime = 0;
259 : : static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
260 : :
261 : : /* Local copy of WalRcv->flushedUpto */
262 : : static XLogRecPtr flushedUpto = 0;
263 : : static TimeLineID receiveTLI = 0;
264 : :
265 : : /*
266 : : * Copy of minRecoveryPoint and backupEndPoint from the control file.
267 : : *
268 : : * In order to reach consistency, we must replay the WAL up to
269 : : * minRecoveryPoint. If backupEndRequired is true, we must also reach
270 : : * backupEndPoint, or if it's invalid, an end-of-backup record corresponding
271 : : * to backupStartPoint.
272 : : *
273 : : * Note: In archive recovery, after consistency has been reached, the
274 : : * functions in xlog.c will start updating minRecoveryPoint in the control
275 : : * file. But this copy of minRecoveryPoint variable reflects the value at the
276 : : * beginning of recovery, and is *not* updated after consistency is reached.
277 : : */
278 : : static XLogRecPtr minRecoveryPoint;
279 : : static TimeLineID minRecoveryPointTLI;
280 : :
281 : : static XLogRecPtr backupStartPoint;
282 : : static XLogRecPtr backupEndPoint;
283 : : static bool backupEndRequired = false;
284 : :
285 : : /*
286 : : * Have we reached a consistent database state? In crash recovery, we have
287 : : * to replay all the WAL, so reachedConsistency is never set. During archive
288 : : * recovery, the database is consistent once minRecoveryPoint is reached.
289 : : *
290 : : * Consistent state means that the system is internally consistent, all
291 : : * the WAL has been replayed up to a certain point, and importantly, there
292 : : * is no trace of later actions on disk.
293 : : */
294 : : bool reachedConsistency = false;
295 : :
296 : : /* Buffers dedicated to consistency checks of size BLCKSZ */
297 : : static char *replay_image_masked = NULL;
298 : : static char *primary_image_masked = NULL;
299 : :
300 : :
301 : : /*
302 : : * Shared-memory state for WAL recovery.
303 : : */
304 : : typedef struct XLogRecoveryCtlData
305 : : {
306 : : /*
307 : : * SharedHotStandbyActive indicates if we allow hot standby queries to be
308 : : * run. Protected by info_lck.
309 : : */
310 : : bool SharedHotStandbyActive;
311 : :
312 : : /*
313 : : * SharedPromoteIsTriggered indicates if a standby promotion has been
314 : : * triggered. Protected by info_lck.
315 : : */
316 : : bool SharedPromoteIsTriggered;
317 : :
318 : : /*
319 : : * recoveryWakeupLatch is used to wake up the startup process to continue
320 : : * WAL replay, if it is waiting for WAL to arrive or promotion to be
321 : : * requested.
322 : : *
323 : : * Note that the startup process also uses another latch, its procLatch,
324 : : * to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
325 : : * signaling the startup process in favor of using its procLatch, which
326 : : * comports better with possible generic signal handlers using that latch.
327 : : * But we should not do that because the startup process doesn't assume
328 : : * that it's waken up by walreceiver process or SIGHUP signal handler
329 : : * while it's waiting for recovery conflict. The separate latches,
330 : : * recoveryWakeupLatch and procLatch, should be used for inter-process
331 : : * communication for WAL replay and recovery conflict, respectively.
332 : : */
333 : : Latch recoveryWakeupLatch;
334 : :
335 : : /*
336 : : * Last record successfully replayed.
337 : : */
338 : : XLogRecPtr lastReplayedReadRecPtr; /* start position */
339 : : XLogRecPtr lastReplayedEndRecPtr; /* end+1 position */
340 : : TimeLineID lastReplayedTLI; /* timeline */
341 : :
342 : : /*
343 : : * When we're currently replaying a record, ie. in a redo function,
344 : : * replayEndRecPtr points to the end+1 of the record being replayed,
345 : : * otherwise it's equal to lastReplayedEndRecPtr.
346 : : */
347 : : XLogRecPtr replayEndRecPtr;
348 : : TimeLineID replayEndTLI;
349 : : /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
350 : : TimestampTz recoveryLastXTime;
351 : :
352 : : /*
353 : : * timestamp of when we started replaying the current chunk of WAL data,
354 : : * only relevant for replication or archive recovery
355 : : */
356 : : TimestampTz currentChunkStartTime;
357 : : /* Recovery pause state */
358 : : RecoveryPauseState recoveryPauseState;
359 : : ConditionVariable recoveryNotPausedCV;
360 : :
361 : : slock_t info_lck; /* locks shared variables shown above */
362 : : } XLogRecoveryCtlData;
363 : :
364 : : static XLogRecoveryCtlData *XLogRecoveryCtl = NULL;
365 : :
366 : : /*
367 : : * abortedRecPtr is the start pointer of a broken record at end of WAL when
368 : : * recovery completes; missingContrecPtr is the location of the first
369 : : * contrecord that went missing. See CreateOverwriteContrecordRecord for
370 : : * details.
371 : : */
372 : : static XLogRecPtr abortedRecPtr;
373 : : static XLogRecPtr missingContrecPtr;
374 : :
375 : : /*
376 : : * if recoveryStopsBefore/After returns true, it saves information of the stop
377 : : * point here
378 : : */
379 : : static TransactionId recoveryStopXid;
380 : : static TimestampTz recoveryStopTime;
381 : : static XLogRecPtr recoveryStopLSN;
382 : : static char recoveryStopName[MAXFNAMELEN];
383 : : static bool recoveryStopAfter;
384 : :
385 : : /* prototypes for local functions */
386 : : static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI);
387 : :
388 : : static void EnableStandbyMode(void);
389 : : static void readRecoverySignalFile(void);
390 : : static void validateRecoveryParameters(void);
391 : : static bool read_backup_label(XLogRecPtr *checkPointLoc,
392 : : TimeLineID *backupLabelTLI,
393 : : bool *backupEndRequired, bool *backupFromStandby);
394 : : static bool read_tablespace_map(List **tablespaces);
395 : :
396 : : static void xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI);
397 : : static void CheckRecoveryConsistency(void);
398 : : static void rm_redo_error_callback(void *arg);
399 : : #ifdef WAL_DEBUG
400 : : static void xlog_outrec(StringInfo buf, XLogReaderState *record);
401 : : #endif
402 : : static void xlog_block_info(StringInfo buf, XLogReaderState *record);
403 : : static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
404 : : TimeLineID prevTLI, TimeLineID replayTLI);
405 : : static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime);
406 : : static void verifyBackupPageConsistency(XLogReaderState *record);
407 : :
408 : : static bool recoveryStopsBefore(XLogReaderState *record);
409 : : static bool recoveryStopsAfter(XLogReaderState *record);
410 : : static char *getRecoveryStopReason(void);
411 : : static void recoveryPausesHere(bool endOfRecovery);
412 : : static bool recoveryApplyDelay(XLogReaderState *record);
413 : : static void ConfirmRecoveryPaused(void);
414 : :
415 : : static XLogRecord *ReadRecord(XLogPrefetcher *xlogprefetcher,
416 : : int emode, bool fetching_ckpt,
417 : : TimeLineID replayTLI);
418 : :
419 : : static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
420 : : int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
421 : : static XLogPageReadResult WaitForWALToBecomeAvailable(XLogRecPtr RecPtr,
422 : : bool randAccess,
423 : : bool fetching_ckpt,
424 : : XLogRecPtr tliRecPtr,
425 : : TimeLineID replayTLI,
426 : : XLogRecPtr replayLSN,
427 : : bool nonblocking);
428 : : static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
429 : : static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
430 : : XLogRecPtr RecPtr, TimeLineID replayTLI);
431 : : static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
432 : : static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
433 : : XLogSource source, bool notfoundOk);
434 : : static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source);
435 : :
436 : : static bool CheckForStandbyTrigger(void);
437 : : static void SetPromoteIsTriggered(void);
438 : : static bool HotStandbyActiveInReplay(void);
439 : :
440 : : static void SetCurrentChunkStartTime(TimestampTz xtime);
441 : : static void SetLatestXTime(TimestampTz xtime);
442 : :
443 : : /*
444 : : * Initialization of shared memory for WAL recovery
445 : : */
446 : : Size
788 heikki.linnakangas@i 447 :CBC 2577 : XLogRecoveryShmemSize(void)
448 : : {
449 : : Size size;
450 : :
451 : : /* XLogRecoveryCtl */
452 : 2577 : size = sizeof(XLogRecoveryCtlData);
453 : :
454 : 2577 : return size;
455 : : }
456 : :
457 : : void
458 : 898 : XLogRecoveryShmemInit(void)
459 : : {
460 : : bool found;
461 : :
462 : 898 : XLogRecoveryCtl = (XLogRecoveryCtlData *)
463 : 898 : ShmemInitStruct("XLOG Recovery Ctl", XLogRecoveryShmemSize(), &found);
464 [ - + ]: 898 : if (found)
788 heikki.linnakangas@i 465 :UBC 0 : return;
788 heikki.linnakangas@i 466 :CBC 898 : memset(XLogRecoveryCtl, 0, sizeof(XLogRecoveryCtlData));
467 : :
468 : 898 : SpinLockInit(&XLogRecoveryCtl->info_lck);
469 : 898 : InitSharedLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
470 : 898 : ConditionVariableInit(&XLogRecoveryCtl->recoveryNotPausedCV);
471 : : }
472 : :
473 : : /*
474 : : * A thin wrapper to enable StandbyMode and do other preparatory work as
475 : : * needed.
476 : : */
477 : : static void
433 rhaas@postgresql.org 478 : 135 : EnableStandbyMode(void)
479 : : {
480 : 135 : StandbyMode = true;
481 : :
482 : : /*
483 : : * To avoid server log bloat, we don't report recovery progress in a
484 : : * standby as it will always be in recovery unless promoted. We disable
485 : : * startup progress timeout in standby mode to avoid calling
486 : : * startup_progress_timeout_handler() unnecessarily.
487 : : */
488 : 135 : disable_startup_progress_timeout();
489 : 135 : }
490 : :
491 : : /*
492 : : * Prepare the system for WAL recovery, if needed.
493 : : *
494 : : * This is called by StartupXLOG() which coordinates the server startup
495 : : * sequence. This function analyzes the control file and the backup label
496 : : * file, if any, and figures out whether we need to perform crash recovery or
497 : : * archive recovery, and how far we need to replay the WAL to reach a
498 : : * consistent state.
499 : : *
500 : : * This doesn't yet change the on-disk state, except for creating the symlinks
501 : : * from table space map file if any, and for fetching WAL files needed to find
502 : : * the checkpoint record. On entry, the caller has already read the control
503 : : * file into memory, and passes it as argument. This function updates it to
504 : : * reflect the recovery state, and the caller is expected to write it back to
505 : : * disk does after initializing other subsystems, but before calling
506 : : * PerformWalRecovery().
507 : : *
508 : : * This initializes some global variables like ArchiveRecoveryRequested, and
509 : : * StandbyModeRequested and InRecovery.
510 : : */
511 : : void
788 heikki.linnakangas@i 512 : 823 : InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
513 : : bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
514 : : {
515 : : XLogPageReadPrivate *private;
516 : : struct stat st;
517 : : bool wasShutdown;
518 : : XLogRecord *record;
519 : : DBState dbstate_at_startup;
520 : 823 : bool haveTblspcMap = false;
521 : 823 : bool haveBackupLabel = false;
522 : : CheckPoint checkPoint;
523 : 823 : bool backupFromStandby = false;
524 : :
525 : 823 : dbstate_at_startup = ControlFile->state;
526 : :
527 : : /*
528 : : * Initialize on the assumption we want to recover to the latest timeline
529 : : * that's active according to pg_control.
530 : : */
531 : 823 : if (ControlFile->minRecoveryPointTLI >
532 [ + + ]: 823 : ControlFile->checkPointCopy.ThisTimeLineID)
533 : 4 : recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
534 : : else
535 : 819 : recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
536 : :
537 : : /*
538 : : * Check for signal files, and if so set up state for offline recovery
539 : : */
540 : 823 : readRecoverySignalFile();
541 : 823 : validateRecoveryParameters();
542 : :
543 : : /*
544 : : * Take ownership of the wakeup latch if we're going to sleep during
545 : : * recovery, if required.
546 : : */
547 [ + + ]: 823 : if (ArchiveRecoveryRequested)
548 : 140 : OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
549 : :
550 : : /*
551 : : * Set the WAL reading processor now, as it will be needed when reading
552 : : * the checkpoint record required (backup_label or not).
553 : : */
554 : 823 : private = palloc0(sizeof(XLogPageReadPrivate));
555 : 823 : xlogreader =
556 : 823 : XLogReaderAllocate(wal_segment_size, NULL,
557 : 823 : XL_ROUTINE(.page_read = &XLogPageRead,
558 : : .segment_open = NULL,
559 : : .segment_close = wal_segment_close),
560 : : private);
561 [ - + ]: 823 : if (!xlogreader)
788 heikki.linnakangas@i 562 [ # # ]:UBC 0 : ereport(ERROR,
563 : : (errcode(ERRCODE_OUT_OF_MEMORY),
564 : : errmsg("out of memory"),
565 : : errdetail("Failed while allocating a WAL reading processor.")));
788 heikki.linnakangas@i 566 :CBC 823 : xlogreader->system_identifier = ControlFile->system_identifier;
567 : :
568 : : /*
569 : : * Set the WAL decode buffer size. This limits how far ahead we can read
570 : : * in the WAL.
571 : : */
738 tmunro@postgresql.or 572 : 823 : XLogReaderSetDecodeBuffer(xlogreader, NULL, wal_decode_buffer_size);
573 : :
574 : : /* Create a WAL prefetcher. */
575 : 823 : xlogprefetcher = XLogPrefetcherAllocate(xlogreader);
576 : :
577 : : /*
578 : : * Allocate two page buffers dedicated to WAL consistency checks. We do
579 : : * it this way, rather than just making static arrays, for two reasons:
580 : : * (1) no need to waste the storage in most instantiations of the backend;
581 : : * (2) a static char array isn't guaranteed to have any particular
582 : : * alignment, whereas palloc() will provide MAXALIGN'd storage.
583 : : */
788 heikki.linnakangas@i 584 : 823 : replay_image_masked = (char *) palloc(BLCKSZ);
585 : 823 : primary_image_masked = (char *) palloc(BLCKSZ);
586 : :
587 : : /*
588 : : * Read the backup_label file. We want to run this part of the recovery
589 : : * process after checking for signal files and after performing validation
590 : : * of the recovery parameters.
591 : : */
592 [ + + ]: 823 : if (read_backup_label(&CheckPointLoc, &CheckPointTLI, &backupEndRequired,
593 : : &backupFromStandby))
594 : : {
595 : 95 : List *tablespaces = NIL;
596 : :
597 : : /*
598 : : * Archive recovery was requested, and thanks to the backup label
599 : : * file, we know how far we need to replay to reach consistency. Enter
600 : : * archive recovery directly.
601 : : */
602 : 95 : InArchiveRecovery = true;
603 [ + + ]: 95 : if (StandbyModeRequested)
433 rhaas@postgresql.org 604 : 85 : EnableStandbyMode();
605 : :
606 : : /*
607 : : * Omitting backup_label when creating a new replica, PITR node etc.
608 : : * unfortunately is a common cause of corruption. Logging that
609 : : * backup_label was used makes it a bit easier to exclude that as the
610 : : * cause of observed corruption.
611 : : *
612 : : * Do so before we try to read the checkpoint record (which can fail),
613 : : * as otherwise it can be hard to understand why a checkpoint other
614 : : * than ControlFile->checkPoint is used.
615 : : */
80 michael@paquier.xyz 616 [ + - ]: 95 : ereport(LOG,
617 : : (errmsg("starting backup recovery with redo LSN %X/%X, checkpoint LSN %X/%X, on timeline ID %u",
618 : : LSN_FORMAT_ARGS(RedoStartLSN),
619 : : LSN_FORMAT_ARGS(CheckPointLoc),
620 : : CheckPointTLI)));
621 : :
622 : : /*
623 : : * When a backup_label file is present, we want to roll forward from
624 : : * the checkpoint it identifies, rather than using pg_control.
625 : : */
634 fujii@postgresql.org 626 : 95 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
627 : : CheckPointTLI);
788 heikki.linnakangas@i 628 [ + - ]: 95 : if (record != NULL)
629 : : {
630 : 95 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
631 : 95 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
632 [ + + ]: 95 : ereport(DEBUG1,
633 : : (errmsg_internal("checkpoint record is at %X/%X",
634 : : LSN_FORMAT_ARGS(CheckPointLoc))));
635 : 95 : InRecovery = true; /* force recovery even if SHUTDOWNED */
636 : :
637 : : /*
638 : : * Make sure that REDO location exists. This may not be the case
639 : : * if there was a crash during an online backup, which left a
640 : : * backup_label around that references a WAL segment that's
641 : : * already been archived.
642 : : */
643 [ + - ]: 95 : if (checkPoint.redo < CheckPointLoc)
644 : : {
738 tmunro@postgresql.or 645 : 95 : XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo);
646 [ - + ]: 95 : if (!ReadRecord(xlogprefetcher, LOG, false,
647 : : checkPoint.ThisTimeLineID))
788 heikki.linnakangas@i 648 [ # # ]:UBC 0 : ereport(FATAL,
649 : : (errmsg("could not find redo location %X/%X referenced by checkpoint record at %X/%X",
650 : : LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
651 : : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
652 : : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
653 : : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
654 : : DataDir, DataDir, DataDir, DataDir)));
655 : : }
656 : : }
657 : : else
658 : : {
659 [ # # ]: 0 : ereport(FATAL,
660 : : (errmsg("could not locate required checkpoint record at %X/%X",
661 : : LSN_FORMAT_ARGS(CheckPointLoc)),
662 : : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
663 : : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
664 : : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
665 : : DataDir, DataDir, DataDir, DataDir)));
666 : : wasShutdown = false; /* keep compiler quiet */
667 : : }
668 : :
669 : : /* Read the tablespace_map file if present and create symlinks. */
788 heikki.linnakangas@i 670 [ + + ]:CBC 95 : if (read_tablespace_map(&tablespaces))
671 : : {
672 : : ListCell *lc;
673 : :
674 [ + - + + : 2 : foreach(lc, tablespaces)
+ + ]
675 : : {
676 : 1 : tablespaceinfo *ti = lfirst(lc);
677 : : char *linkloc;
678 : :
174 rhaas@postgresql.org 679 :GNC 1 : linkloc = psprintf("pg_tblspc/%u", ti->oid);
680 : :
681 : : /*
682 : : * Remove the existing symlink if any and Create the symlink
683 : : * under PGDATA.
684 : : */
788 heikki.linnakangas@i 685 :CBC 1 : remove_tablespace_symlink(linkloc);
686 : :
687 [ - + ]: 1 : if (symlink(ti->path, linkloc) < 0)
788 heikki.linnakangas@i 688 [ # # ]:UBC 0 : ereport(ERROR,
689 : : (errcode_for_file_access(),
690 : : errmsg("could not create symbolic link \"%s\": %m",
691 : : linkloc)));
692 : :
788 heikki.linnakangas@i 693 :CBC 1 : pfree(ti->path);
694 : 1 : pfree(ti);
695 : : }
696 : :
697 : : /* tell the caller to delete it later */
698 : 1 : haveTblspcMap = true;
699 : : }
700 : :
701 : : /* tell the caller to delete it later */
702 : 95 : haveBackupLabel = true;
703 : : }
704 : : else
705 : : {
706 : : /* No backup_label file has been found if we are here. */
707 : :
708 : : /*
709 : : * If tablespace_map file is present without backup_label file, there
710 : : * is no use of such file. There is no harm in retaining it, but it
711 : : * is better to get rid of the map file so that we don't have any
712 : : * redundant file in data directory and it will avoid any sort of
713 : : * confusion. It seems prudent though to just rename the file out of
714 : : * the way rather than delete it completely, also we ignore any error
715 : : * that occurs in rename operation as even if map file is present
716 : : * without backup_label file, it is harmless.
717 : : */
718 [ + + ]: 728 : if (stat(TABLESPACE_MAP, &st) == 0)
719 : : {
720 : 1 : unlink(TABLESPACE_MAP_OLD);
721 [ + - ]: 1 : if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
722 [ + - ]: 1 : ereport(LOG,
723 : : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
724 : : TABLESPACE_MAP, BACKUP_LABEL_FILE),
725 : : errdetail("File \"%s\" was renamed to \"%s\".",
726 : : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
727 : : else
788 heikki.linnakangas@i 728 [ # # ]:UBC 0 : ereport(LOG,
729 : : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
730 : : TABLESPACE_MAP, BACKUP_LABEL_FILE),
731 : : errdetail("Could not rename file \"%s\" to \"%s\": %m.",
732 : : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
733 : : }
734 : :
735 : : /*
736 : : * It's possible that archive recovery was requested, but we don't
737 : : * know how far we need to replay the WAL before we reach consistency.
738 : : * This can happen for example if a base backup is taken from a
739 : : * running server using an atomic filesystem snapshot, without calling
740 : : * pg_backup_start/stop. Or if you just kill a running primary server
741 : : * and put it into archive recovery by creating a recovery signal
742 : : * file.
743 : : *
744 : : * Our strategy in that case is to perform crash recovery first,
745 : : * replaying all the WAL present in pg_wal, and only enter archive
746 : : * recovery after that.
747 : : *
748 : : * But usually we already know how far we need to replay the WAL (up
749 : : * to minRecoveryPoint, up to backupEndPoint, or until we see an
750 : : * end-of-backup record), and we can enter archive recovery directly.
751 : : */
788 heikki.linnakangas@i 752 [ + + ]:CBC 728 : if (ArchiveRecoveryRequested &&
753 [ + + ]: 50 : (ControlFile->minRecoveryPoint != InvalidXLogRecPtr ||
754 [ + - ]: 14 : ControlFile->backupEndRequired ||
755 [ + - ]: 14 : ControlFile->backupEndPoint != InvalidXLogRecPtr ||
756 [ + + ]: 14 : ControlFile->state == DB_SHUTDOWNED))
757 : : {
758 : 46 : InArchiveRecovery = true;
759 [ + - ]: 46 : if (StandbyModeRequested)
433 rhaas@postgresql.org 760 : 46 : EnableStandbyMode();
761 : : }
762 : :
763 : : /*
764 : : * For the same reason as when starting up with backup_label present,
765 : : * emit a log message when we continue initializing from a base
766 : : * backup.
767 : : */
80 michael@paquier.xyz 768 [ - + ]: 728 : if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
80 michael@paquier.xyz 769 [ # # ]:UBC 0 : ereport(LOG,
770 : : (errmsg("restarting backup recovery with redo LSN %X/%X",
771 : : LSN_FORMAT_ARGS(ControlFile->backupStartPoint))));
772 : :
773 : : /* Get the last valid checkpoint record. */
788 heikki.linnakangas@i 774 :CBC 728 : CheckPointLoc = ControlFile->checkPoint;
775 : 728 : CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
776 : 728 : RedoStartLSN = ControlFile->checkPointCopy.redo;
777 : 728 : RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
634 fujii@postgresql.org 778 : 728 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
779 : : CheckPointTLI);
788 heikki.linnakangas@i 780 [ + - ]: 728 : if (record != NULL)
781 : : {
782 [ + + ]: 728 : ereport(DEBUG1,
783 : : (errmsg_internal("checkpoint record is at %X/%X",
784 : : LSN_FORMAT_ARGS(CheckPointLoc))));
785 : : }
786 : : else
787 : : {
788 : : /*
789 : : * We used to attempt to go back to a secondary checkpoint record
790 : : * here, but only when not in standby mode. We now just fail if we
791 : : * can't read the last checkpoint because this allows us to
792 : : * simplify processing around checkpoints.
793 : : */
788 heikki.linnakangas@i 794 [ # # ]:UBC 0 : ereport(PANIC,
795 : : (errmsg("could not locate a valid checkpoint record at %X/%X",
796 : : LSN_FORMAT_ARGS(CheckPointLoc))));
797 : : }
788 heikki.linnakangas@i 798 :CBC 728 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
799 : 728 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
800 : : }
801 : :
167 michael@paquier.xyz 802 [ + + ]:GNC 823 : if (ArchiveRecoveryRequested)
803 : : {
804 [ + + ]: 140 : if (StandbyModeRequested)
805 [ + - ]: 135 : ereport(LOG,
806 : : (errmsg("entering standby mode")));
807 [ - + ]: 5 : else if (recoveryTarget == RECOVERY_TARGET_XID)
167 michael@paquier.xyz 808 [ # # ]:UNC 0 : ereport(LOG,
809 : : (errmsg("starting point-in-time recovery to XID %u",
810 : : recoveryTargetXid)));
167 michael@paquier.xyz 811 [ - + ]:GNC 5 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
167 michael@paquier.xyz 812 [ # # ]:UNC 0 : ereport(LOG,
813 : : (errmsg("starting point-in-time recovery to %s",
814 : : timestamptz_to_str(recoveryTargetTime))));
167 michael@paquier.xyz 815 [ + + ]:GNC 5 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
816 [ + - ]: 3 : ereport(LOG,
817 : : (errmsg("starting point-in-time recovery to \"%s\"",
818 : : recoveryTargetName)));
819 [ - + ]: 2 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
167 michael@paquier.xyz 820 [ # # ]:UNC 0 : ereport(LOG,
821 : : (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
822 : : LSN_FORMAT_ARGS(recoveryTargetLSN))));
167 michael@paquier.xyz 823 [ - + ]:GNC 2 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
167 michael@paquier.xyz 824 [ # # ]:UNC 0 : ereport(LOG,
825 : : (errmsg("starting point-in-time recovery to earliest consistent point")));
826 : : else
167 michael@paquier.xyz 827 [ + - ]:GNC 2 : ereport(LOG,
828 : : (errmsg("starting archive recovery")));
829 : : }
830 : :
831 : : /*
832 : : * If the location of the checkpoint record is not on the expected
833 : : * timeline in the history of the requested timeline, we cannot proceed:
834 : : * the backup is not part of the history of the requested timeline.
835 : : */
788 heikki.linnakangas@i 836 [ - + ]:CBC 823 : Assert(expectedTLEs); /* was initialized by reading checkpoint
837 : : * record */
838 [ - + ]: 823 : if (tliOfPointInHistory(CheckPointLoc, expectedTLEs) !=
839 : : CheckPointTLI)
840 : : {
841 : : XLogRecPtr switchpoint;
842 : :
843 : : /*
844 : : * tliSwitchPoint will throw an error if the checkpoint's timeline is
845 : : * not in expectedTLEs at all.
846 : : */
788 heikki.linnakangas@i 847 :UBC 0 : switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
848 [ # # ]: 0 : ereport(FATAL,
849 : : (errmsg("requested timeline %u is not a child of this server's history",
850 : : recoveryTargetTLI),
851 : : errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
852 : : LSN_FORMAT_ARGS(ControlFile->checkPoint),
853 : : ControlFile->checkPointCopy.ThisTimeLineID,
854 : : LSN_FORMAT_ARGS(switchpoint))));
855 : : }
856 : :
857 : : /*
858 : : * The min recovery point should be part of the requested timeline's
859 : : * history, too.
860 : : */
788 heikki.linnakangas@i 861 [ + + ]:CBC 823 : if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
862 : 55 : tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
863 [ - + ]: 55 : ControlFile->minRecoveryPointTLI)
788 heikki.linnakangas@i 864 [ # # ]:UBC 0 : ereport(FATAL,
865 : : (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
866 : : recoveryTargetTLI,
867 : : LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
868 : : ControlFile->minRecoveryPointTLI)));
869 : :
788 heikki.linnakangas@i 870 [ + + + + ]:CBC 823 : ereport(DEBUG1,
871 : : (errmsg_internal("redo record is at %X/%X; shutdown %s",
872 : : LSN_FORMAT_ARGS(checkPoint.redo),
873 : : wasShutdown ? "true" : "false")));
874 [ + + ]: 823 : ereport(DEBUG1,
875 : : (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
876 : : U64FromFullTransactionId(checkPoint.nextXid),
877 : : checkPoint.nextOid)));
878 [ + + ]: 823 : ereport(DEBUG1,
879 : : (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
880 : : checkPoint.nextMulti, checkPoint.nextMultiOffset)));
881 [ + + ]: 823 : ereport(DEBUG1,
882 : : (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
883 : : checkPoint.oldestXid, checkPoint.oldestXidDB)));
884 [ + + ]: 823 : ereport(DEBUG1,
885 : : (errmsg_internal("oldest MultiXactId: %u, in database %u",
886 : : checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
887 [ + + ]: 823 : ereport(DEBUG1,
888 : : (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
889 : : checkPoint.oldestCommitTsXid,
890 : : checkPoint.newestCommitTsXid)));
891 [ - + ]: 823 : if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid)))
788 heikki.linnakangas@i 892 [ # # ]:UBC 0 : ereport(PANIC,
893 : : (errmsg("invalid next transaction ID")));
894 : :
895 : : /* sanity check */
788 heikki.linnakangas@i 896 [ - + ]:CBC 823 : if (checkPoint.redo > CheckPointLoc)
788 heikki.linnakangas@i 897 [ # # ]:UBC 0 : ereport(PANIC,
898 : : (errmsg("invalid redo in checkpoint record")));
899 : :
900 : : /*
901 : : * Check whether we need to force recovery from WAL. If it appears to
902 : : * have been a clean shutdown and we did not have a recovery signal file,
903 : : * then assume no recovery needed.
904 : : */
788 heikki.linnakangas@i 905 [ + + ]:CBC 823 : if (checkPoint.redo < CheckPointLoc)
906 : : {
907 [ - + ]: 143 : if (wasShutdown)
788 heikki.linnakangas@i 908 [ # # ]:UBC 0 : ereport(PANIC,
909 : : (errmsg("invalid redo record in shutdown checkpoint")));
788 heikki.linnakangas@i 910 :CBC 143 : InRecovery = true;
911 : : }
912 [ + + ]: 680 : else if (ControlFile->state != DB_SHUTDOWNED)
913 : 89 : InRecovery = true;
914 [ + + ]: 591 : else if (ArchiveRecoveryRequested)
915 : : {
916 : : /* force recovery due to presence of recovery signal file */
917 : 10 : InRecovery = true;
918 : : }
919 : :
920 : : /*
921 : : * If recovery is needed, update our in-memory copy of pg_control to show
922 : : * that we are recovering and to show the selected checkpoint as the place
923 : : * we are starting from. We also mark pg_control with any minimum recovery
924 : : * stop point obtained from a backup history file.
925 : : *
926 : : * We don't write the changes to disk yet, though. Only do that after
927 : : * initializing various subsystems.
928 : : */
929 [ + + ]: 823 : if (InRecovery)
930 : : {
931 [ + + ]: 242 : if (InArchiveRecovery)
932 : : {
933 : 141 : ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
934 : : }
935 : : else
936 : : {
937 [ + - ]: 101 : ereport(LOG,
938 : : (errmsg("database system was not properly shut down; "
939 : : "automatic recovery in progress")));
940 [ + + ]: 101 : if (recoveryTargetTLI > ControlFile->checkPointCopy.ThisTimeLineID)
941 [ + - ]: 2 : ereport(LOG,
942 : : (errmsg("crash recovery starts in timeline %u "
943 : : "and has target timeline %u",
944 : : ControlFile->checkPointCopy.ThisTimeLineID,
945 : : recoveryTargetTLI)));
946 : 101 : ControlFile->state = DB_IN_CRASH_RECOVERY;
947 : : }
948 : 242 : ControlFile->checkPoint = CheckPointLoc;
949 : 242 : ControlFile->checkPointCopy = checkPoint;
950 [ + + ]: 242 : if (InArchiveRecovery)
951 : : {
952 : : /* initialize minRecoveryPoint if not set yet */
953 [ + + ]: 141 : if (ControlFile->minRecoveryPoint < checkPoint.redo)
954 : : {
955 : 88 : ControlFile->minRecoveryPoint = checkPoint.redo;
956 : 88 : ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
957 : : }
958 : : }
959 : :
960 : : /*
961 : : * Set backupStartPoint if we're starting recovery from a base backup.
962 : : *
963 : : * Also set backupEndPoint and use minRecoveryPoint as the backup end
964 : : * location if we're starting recovery from a base backup which was
965 : : * taken from a standby. In this case, the database system status in
966 : : * pg_control must indicate that the database was already in recovery.
967 : : * Usually that will be DB_IN_ARCHIVE_RECOVERY but also can be
968 : : * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted
969 : : * before reaching this point; e.g. because restore_command or
970 : : * primary_conninfo were faulty.
971 : : *
972 : : * Any other state indicates that the backup somehow became corrupted
973 : : * and we can't sensibly continue with recovery.
974 : : */
975 [ + + ]: 242 : if (haveBackupLabel)
976 : : {
977 : 95 : ControlFile->backupStartPoint = checkPoint.redo;
978 : 95 : ControlFile->backupEndRequired = backupEndRequired;
979 : :
980 [ + + ]: 95 : if (backupFromStandby)
981 : : {
982 [ - + - - ]: 17 : if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
983 : : dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
788 heikki.linnakangas@i 984 [ # # ]:UBC 0 : ereport(FATAL,
985 : : (errmsg("backup_label contains data inconsistent with control file"),
986 : : errhint("This means that the backup is corrupted and you will "
987 : : "have to use another backup for recovery.")));
788 heikki.linnakangas@i 988 :CBC 17 : ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
989 : : }
990 : : }
991 : : }
992 : :
993 : : /* remember these, so that we know when we have reached consistency */
994 : 823 : backupStartPoint = ControlFile->backupStartPoint;
995 : 823 : backupEndRequired = ControlFile->backupEndRequired;
996 : 823 : backupEndPoint = ControlFile->backupEndPoint;
997 [ + + ]: 823 : if (InArchiveRecovery)
998 : : {
999 : 141 : minRecoveryPoint = ControlFile->minRecoveryPoint;
1000 : 141 : minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
1001 : : }
1002 : : else
1003 : : {
1004 : 682 : minRecoveryPoint = InvalidXLogRecPtr;
1005 : 682 : minRecoveryPointTLI = 0;
1006 : : }
1007 : :
1008 : : /*
1009 : : * Start recovery assuming that the final record isn't lost.
1010 : : */
1011 : 823 : abortedRecPtr = InvalidXLogRecPtr;
1012 : 823 : missingContrecPtr = InvalidXLogRecPtr;
1013 : :
1014 : 823 : *wasShutdown_ptr = wasShutdown;
1015 : 823 : *haveBackupLabel_ptr = haveBackupLabel;
1016 : 823 : *haveTblspcMap_ptr = haveTblspcMap;
1017 : 823 : }
1018 : :
1019 : : /*
1020 : : * See if there are any recovery signal files and if so, set state for
1021 : : * recovery.
1022 : : *
1023 : : * See if there is a recovery command file (recovery.conf), and if so
1024 : : * throw an ERROR since as of PG12 we no longer recognize that.
1025 : : */
1026 : : static void
1027 : 823 : readRecoverySignalFile(void)
1028 : : {
1029 : : struct stat stat_buf;
1030 : :
1031 [ + + ]: 823 : if (IsBootstrapProcessingMode())
1032 : 683 : return;
1033 : :
1034 : : /*
1035 : : * Check for old recovery API file: recovery.conf
1036 : : */
1037 [ - + ]: 784 : if (stat(RECOVERY_COMMAND_FILE, &stat_buf) == 0)
788 heikki.linnakangas@i 1038 [ # # ]:UBC 0 : ereport(FATAL,
1039 : : (errcode_for_file_access(),
1040 : : errmsg("using recovery command file \"%s\" is not supported",
1041 : : RECOVERY_COMMAND_FILE)));
1042 : :
1043 : : /*
1044 : : * Remove unused .done file, if present. Ignore if absent.
1045 : : */
788 heikki.linnakangas@i 1046 :CBC 784 : unlink(RECOVERY_COMMAND_DONE);
1047 : :
1048 : : /*
1049 : : * Check for recovery signal files and if found, fsync them since they
1050 : : * represent server state information. We don't sweat too much about the
1051 : : * possibility of fsync failure, however.
1052 : : *
1053 : : * If present, standby signal file takes precedence. If neither is present
1054 : : * then we won't enter archive recovery.
1055 : : */
1056 [ + + ]: 784 : if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
1057 : : {
1058 : : int fd;
1059 : :
1060 : 135 : fd = BasicOpenFilePerm(STANDBY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1061 : : S_IRUSR | S_IWUSR);
1062 [ + - ]: 135 : if (fd >= 0)
1063 : : {
1064 : 135 : (void) pg_fsync(fd);
1065 : 135 : close(fd);
1066 : : }
1067 : 135 : standby_signal_file_found = true;
1068 : : }
1069 [ + + ]: 649 : else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
1070 : : {
1071 : : int fd;
1072 : :
1073 : 5 : fd = BasicOpenFilePerm(RECOVERY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1074 : : S_IRUSR | S_IWUSR);
1075 [ + - ]: 5 : if (fd >= 0)
1076 : : {
1077 : 5 : (void) pg_fsync(fd);
1078 : 5 : close(fd);
1079 : : }
1080 : 5 : recovery_signal_file_found = true;
1081 : : }
1082 : :
1083 : 784 : StandbyModeRequested = false;
1084 : 784 : ArchiveRecoveryRequested = false;
1085 [ + + ]: 784 : if (standby_signal_file_found)
1086 : : {
1087 : 135 : StandbyModeRequested = true;
1088 : 135 : ArchiveRecoveryRequested = true;
1089 : : }
1090 [ + + ]: 649 : else if (recovery_signal_file_found)
1091 : : {
1092 : 5 : StandbyModeRequested = false;
1093 : 5 : ArchiveRecoveryRequested = true;
1094 : : }
1095 : : else
1096 : 644 : return;
1097 : :
1098 : : /*
1099 : : * We don't support standby mode in standalone backends; that requires
1100 : : * other processes such as the WAL receiver to be alive.
1101 : : */
1102 [ + + - + ]: 140 : if (StandbyModeRequested && !IsUnderPostmaster)
788 heikki.linnakangas@i 1103 [ # # ]:UBC 0 : ereport(FATAL,
1104 : : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1105 : : errmsg("standby mode is not supported by single-user servers")));
1106 : : }
1107 : :
1108 : : static void
788 heikki.linnakangas@i 1109 :CBC 823 : validateRecoveryParameters(void)
1110 : : {
1111 [ + + ]: 823 : if (!ArchiveRecoveryRequested)
1112 : 683 : return;
1113 : :
1114 : : /*
1115 : : * Check for compulsory parameters
1116 : : */
1117 [ + + ]: 140 : if (StandbyModeRequested)
1118 : : {
1119 [ + - + + ]: 135 : if ((PrimaryConnInfo == NULL || strcmp(PrimaryConnInfo, "") == 0) &&
1120 [ + - + + ]: 18 : (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0))
1121 [ + - ]: 2 : ereport(WARNING,
1122 : : (errmsg("specified neither primary_conninfo nor restore_command"),
1123 : : errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
1124 : : }
1125 : : else
1126 : : {
1127 [ + - ]: 5 : if (recoveryRestoreCommand == NULL ||
1128 [ - + ]: 5 : strcmp(recoveryRestoreCommand, "") == 0)
788 heikki.linnakangas@i 1129 [ # # ]:UBC 0 : ereport(FATAL,
1130 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1131 : : errmsg("must specify restore_command when standby mode is not enabled")));
1132 : : }
1133 : :
1134 : : /*
1135 : : * Override any inconsistent requests. Note that this is a change of
1136 : : * behaviour in 9.5; prior to this we simply ignored a request to pause if
1137 : : * hot_standby = off, which was surprising behaviour.
1138 : : */
788 heikki.linnakangas@i 1139 [ + + ]:CBC 140 : if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PAUSE &&
1140 [ + + ]: 133 : !EnableHotStandby)
1141 : 2 : recoveryTargetAction = RECOVERY_TARGET_ACTION_SHUTDOWN;
1142 : :
1143 : : /*
1144 : : * Final parsing of recovery_target_time string; see also
1145 : : * check_recovery_target_time().
1146 : : */
1147 [ + + ]: 140 : if (recoveryTarget == RECOVERY_TARGET_TIME)
1148 : : {
1149 : 2 : recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
1150 : : CStringGetDatum(recovery_target_time_string),
1151 : : ObjectIdGetDatum(InvalidOid),
1152 : : Int32GetDatum(-1)));
1153 : : }
1154 : :
1155 : : /*
1156 : : * If user specified recovery_target_timeline, validate it or compute the
1157 : : * "latest" value. We can't do this until after we've gotten the restore
1158 : : * command and set InArchiveRecovery, because we need to fetch timeline
1159 : : * history files from the archive.
1160 : : */
1161 [ - + ]: 140 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
1162 : : {
788 heikki.linnakangas@i 1163 :UBC 0 : TimeLineID rtli = recoveryTargetTLIRequested;
1164 : :
1165 : : /* Timeline 1 does not have a history file, all else should */
1166 [ # # # # ]: 0 : if (rtli != 1 && !existsTimeLineHistory(rtli))
1167 [ # # ]: 0 : ereport(FATAL,
1168 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1169 : : errmsg("recovery target timeline %u does not exist",
1170 : : rtli)));
1171 : 0 : recoveryTargetTLI = rtli;
1172 : : }
788 heikki.linnakangas@i 1173 [ + - ]:CBC 140 : else if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
1174 : : {
1175 : : /* We start the "latest" search from pg_control's timeline */
1176 : 140 : recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
1177 : : }
1178 : : else
1179 : : {
1180 : : /*
1181 : : * else we just use the recoveryTargetTLI as already read from
1182 : : * ControlFile
1183 : : */
788 heikki.linnakangas@i 1184 [ # # ]:UBC 0 : Assert(recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_CONTROLFILE);
1185 : : }
1186 : : }
1187 : :
1188 : : /*
1189 : : * read_backup_label: check to see if a backup_label file is present
1190 : : *
1191 : : * If we see a backup_label during recovery, we assume that we are recovering
1192 : : * from a backup dump file, and we therefore roll forward from the checkpoint
1193 : : * identified by the label file, NOT what pg_control says. This avoids the
1194 : : * problem that pg_control might have been archived one or more checkpoints
1195 : : * later than the start of the dump, and so if we rely on it as the start
1196 : : * point, we will fail to restore a consistent database state.
1197 : : *
1198 : : * Returns true if a backup_label was found (and fills the checkpoint
1199 : : * location and TLI into *checkPointLoc and *backupLabelTLI, respectively);
1200 : : * returns false if not. If this backup_label came from a streamed backup,
1201 : : * *backupEndRequired is set to true. If this backup_label was created during
1202 : : * recovery, *backupFromStandby is set to true.
1203 : : *
1204 : : * Also sets the global variables RedoStartLSN and RedoStartTLI with the LSN
1205 : : * and TLI read from the backup file.
1206 : : */
1207 : : static bool
788 heikki.linnakangas@i 1208 :CBC 823 : read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
1209 : : bool *backupEndRequired, bool *backupFromStandby)
1210 : : {
1211 : : char startxlogfilename[MAXFNAMELEN];
1212 : : TimeLineID tli_from_walseg,
1213 : : tli_from_file;
1214 : : FILE *lfp;
1215 : : char ch;
1216 : : char backuptype[20];
1217 : : char backupfrom[20];
1218 : : char backuplabel[MAXPGPATH];
1219 : : char backuptime[128];
1220 : : uint32 hi,
1221 : : lo;
1222 : :
1223 : : /* suppress possible uninitialized-variable warnings */
1224 : 823 : *checkPointLoc = InvalidXLogRecPtr;
1225 : 823 : *backupLabelTLI = 0;
1226 : 823 : *backupEndRequired = false;
1227 : 823 : *backupFromStandby = false;
1228 : :
1229 : : /*
1230 : : * See if label file is present
1231 : : */
1232 : 823 : lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
1233 [ + + ]: 823 : if (!lfp)
1234 : : {
1235 [ - + ]: 728 : if (errno != ENOENT)
788 heikki.linnakangas@i 1236 [ # # ]:UBC 0 : ereport(FATAL,
1237 : : (errcode_for_file_access(),
1238 : : errmsg("could not read file \"%s\": %m",
1239 : : BACKUP_LABEL_FILE)));
788 heikki.linnakangas@i 1240 :CBC 728 : return false; /* it's not there, all is fine */
1241 : : }
1242 : :
1243 : : /*
1244 : : * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code
1245 : : * is pretty crude, but we are not expecting any variability in the file
1246 : : * format).
1247 : : */
1248 [ + - ]: 95 : if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
1249 [ - + ]: 95 : &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
788 heikki.linnakangas@i 1250 [ # # ]:UBC 0 : ereport(FATAL,
1251 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1252 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
788 heikki.linnakangas@i 1253 :CBC 95 : RedoStartLSN = ((uint64) hi) << 32 | lo;
1254 : 95 : RedoStartTLI = tli_from_walseg;
1255 [ + - ]: 95 : if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
1256 [ - + ]: 95 : &hi, &lo, &ch) != 3 || ch != '\n')
788 heikki.linnakangas@i 1257 [ # # ]:UBC 0 : ereport(FATAL,
1258 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1259 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
788 heikki.linnakangas@i 1260 :CBC 95 : *checkPointLoc = ((uint64) hi) << 32 | lo;
1261 : 95 : *backupLabelTLI = tli_from_walseg;
1262 : :
1263 : : /*
1264 : : * BACKUP METHOD lets us know if this was a typical backup ("streamed",
1265 : : * which could mean either pg_basebackup or the pg_backup_start/stop
1266 : : * method was used) or if this label came from somewhere else (the only
1267 : : * other option today being from pg_rewind). If this was a streamed
1268 : : * backup then we know that we need to play through until we get to the
1269 : : * end of the WAL which was generated during the backup (at which point we
1270 : : * will have reached consistency and backupEndRequired will be reset to be
1271 : : * false).
1272 : : */
1273 [ + - ]: 95 : if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
1274 : : {
1275 [ + + ]: 95 : if (strcmp(backuptype, "streamed") == 0)
1276 : 84 : *backupEndRequired = true;
1277 : : }
1278 : :
1279 : : /*
1280 : : * BACKUP FROM lets us know if this was from a primary or a standby. If
1281 : : * it was from a standby, we'll double-check that the control file state
1282 : : * matches that of a standby.
1283 : : */
1284 [ + - ]: 95 : if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
1285 : : {
1286 [ + + ]: 95 : if (strcmp(backupfrom, "standby") == 0)
1287 : 17 : *backupFromStandby = true;
1288 : : }
1289 : :
1290 : : /*
1291 : : * Parse START TIME and LABEL. Those are not mandatory fields for recovery
1292 : : * but checking for their presence is useful for debugging and the next
1293 : : * sanity checks. Cope also with the fact that the result buffers have a
1294 : : * pre-allocated size, hence if the backup_label file has been generated
1295 : : * with strings longer than the maximum assumed here an incorrect parsing
1296 : : * happens. That's fine as only minor consistency checks are done
1297 : : * afterwards.
1298 : : */
1299 [ + - ]: 95 : if (fscanf(lfp, "START TIME: %127[^\n]\n", backuptime) == 1)
1300 [ + + ]: 95 : ereport(DEBUG1,
1301 : : (errmsg_internal("backup time %s in file \"%s\"",
1302 : : backuptime, BACKUP_LABEL_FILE)));
1303 : :
1304 [ + + ]: 95 : if (fscanf(lfp, "LABEL: %1023[^\n]\n", backuplabel) == 1)
1305 [ + + ]: 84 : ereport(DEBUG1,
1306 : : (errmsg_internal("backup label %s in file \"%s\"",
1307 : : backuplabel, BACKUP_LABEL_FILE)));
1308 : :
1309 : : /*
1310 : : * START TIMELINE is new as of 11. Its parsing is not mandatory, still use
1311 : : * it as a sanity check if present.
1312 : : */
1313 [ + + ]: 95 : if (fscanf(lfp, "START TIMELINE: %u\n", &tli_from_file) == 1)
1314 : : {
1315 [ - + ]: 84 : if (tli_from_walseg != tli_from_file)
788 heikki.linnakangas@i 1316 [ # # ]:UBC 0 : ereport(FATAL,
1317 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1318 : : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE),
1319 : : errdetail("Timeline ID parsed is %u, but expected %u.",
1320 : : tli_from_file, tli_from_walseg)));
1321 : :
788 heikki.linnakangas@i 1322 [ + + ]:CBC 84 : ereport(DEBUG1,
1323 : : (errmsg_internal("backup timeline %u in file \"%s\"",
1324 : : tli_from_file, BACKUP_LABEL_FILE)));
1325 : : }
1326 : :
116 rhaas@postgresql.org 1327 [ - + ]:GNC 95 : if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%X\n", &hi, &lo) > 0)
116 rhaas@postgresql.org 1328 [ # # ]:UNC 0 : ereport(FATAL,
1329 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1330 : : errmsg("this is an incremental backup, not a data directory"),
1331 : : errhint("Use pg_combinebackup to reconstruct a valid data directory.")));
1332 : :
788 heikki.linnakangas@i 1333 [ + - - + ]:CBC 95 : if (ferror(lfp) || FreeFile(lfp))
788 heikki.linnakangas@i 1334 [ # # ]:UBC 0 : ereport(FATAL,
1335 : : (errcode_for_file_access(),
1336 : : errmsg("could not read file \"%s\": %m",
1337 : : BACKUP_LABEL_FILE)));
1338 : :
788 heikki.linnakangas@i 1339 :CBC 95 : return true;
1340 : : }
1341 : :
1342 : : /*
1343 : : * read_tablespace_map: check to see if a tablespace_map file is present
1344 : : *
1345 : : * If we see a tablespace_map file during recovery, we assume that we are
1346 : : * recovering from a backup dump file, and we therefore need to create symlinks
1347 : : * as per the information present in tablespace_map file.
1348 : : *
1349 : : * Returns true if a tablespace_map file was found (and fills *tablespaces
1350 : : * with a tablespaceinfo struct for each tablespace listed in the file);
1351 : : * returns false if not.
1352 : : */
1353 : : static bool
1354 : 95 : read_tablespace_map(List **tablespaces)
1355 : : {
1356 : : tablespaceinfo *ti;
1357 : : FILE *lfp;
1358 : : char str[MAXPGPATH];
1359 : : int ch,
1360 : : i,
1361 : : n;
1362 : : bool was_backslash;
1363 : :
1364 : : /*
1365 : : * See if tablespace_map file is present
1366 : : */
1367 : 95 : lfp = AllocateFile(TABLESPACE_MAP, "r");
1368 [ + + ]: 95 : if (!lfp)
1369 : : {
1370 [ - + ]: 94 : if (errno != ENOENT)
788 heikki.linnakangas@i 1371 [ # # ]:UBC 0 : ereport(FATAL,
1372 : : (errcode_for_file_access(),
1373 : : errmsg("could not read file \"%s\": %m",
1374 : : TABLESPACE_MAP)));
788 heikki.linnakangas@i 1375 :CBC 94 : return false; /* it's not there, all is fine */
1376 : : }
1377 : :
1378 : : /*
1379 : : * Read and parse the link name and path lines from tablespace_map file
1380 : : * (this code is pretty crude, but we are not expecting any variability in
1381 : : * the file format). De-escape any backslashes that were inserted.
1382 : : */
1383 : 1 : i = 0;
1384 : 1 : was_backslash = false;
1385 [ + + ]: 46 : while ((ch = fgetc(lfp)) != EOF)
1386 : : {
1387 [ + - + + : 45 : if (!was_backslash && (ch == '\n' || ch == '\r'))
- + ]
788 heikki.linnakangas@i 1388 :GIC 1 : {
1389 : : char *endp;
1390 : :
788 heikki.linnakangas@i 1391 [ - + ]:CBC 1 : if (i == 0)
788 heikki.linnakangas@i 1392 :UBC 0 : continue; /* \r immediately followed by \n */
1393 : :
1394 : : /*
1395 : : * The de-escaped line should contain an OID followed by exactly
1396 : : * one space followed by a path. The path might start with
1397 : : * spaces, so don't be too liberal about parsing.
1398 : : */
788 heikki.linnakangas@i 1399 :CBC 1 : str[i] = '\0';
1400 : 1 : n = 0;
1401 [ + - + + ]: 6 : while (str[n] && str[n] != ' ')
1402 : 5 : n++;
1403 [ + - - + ]: 1 : if (n < 1 || n >= i - 1)
788 heikki.linnakangas@i 1404 [ # # ]:UBC 0 : ereport(FATAL,
1405 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1406 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
788 heikki.linnakangas@i 1407 :CBC 1 : str[n++] = '\0';
1408 : :
1409 : 1 : ti = palloc0(sizeof(tablespaceinfo));
174 rhaas@postgresql.org 1410 :GNC 1 : errno = 0;
1411 : 1 : ti->oid = strtoul(str, &endp, 10);
1412 [ + - + - : 1 : if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
- + ]
174 rhaas@postgresql.org 1413 [ # # ]:UNC 0 : ereport(FATAL,
1414 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1415 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
788 heikki.linnakangas@i 1416 :CBC 1 : ti->path = pstrdup(str + n);
1417 : 1 : *tablespaces = lappend(*tablespaces, ti);
1418 : :
1419 : 1 : i = 0;
1420 : 1 : continue;
1421 : : }
1422 [ + - - + ]: 44 : else if (!was_backslash && ch == '\\')
788 heikki.linnakangas@i 1423 :UBC 0 : was_backslash = true;
1424 : : else
1425 : : {
788 heikki.linnakangas@i 1426 [ + - ]:CBC 44 : if (i < sizeof(str) - 1)
1427 : 44 : str[i++] = ch;
1428 : 44 : was_backslash = false;
1429 : : }
1430 : : }
1431 : :
1432 [ + - - + ]: 1 : if (i != 0 || was_backslash) /* last line not terminated? */
788 heikki.linnakangas@i 1433 [ # # ]:UBC 0 : ereport(FATAL,
1434 : : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1435 : : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
1436 : :
788 heikki.linnakangas@i 1437 [ + - - + ]:CBC 1 : if (ferror(lfp) || FreeFile(lfp))
788 heikki.linnakangas@i 1438 [ # # ]:UBC 0 : ereport(FATAL,
1439 : : (errcode_for_file_access(),
1440 : : errmsg("could not read file \"%s\": %m",
1441 : : TABLESPACE_MAP)));
1442 : :
788 heikki.linnakangas@i 1443 :CBC 1 : return true;
1444 : : }
1445 : :
1446 : : /*
1447 : : * Finish WAL recovery.
1448 : : *
1449 : : * This does not close the 'xlogreader' yet, because in some cases the caller
1450 : : * still wants to re-read the last checkpoint record by calling
1451 : : * ReadCheckpointRecord().
1452 : : *
1453 : : * Returns the position of the last valid or applied record, after which new
1454 : : * WAL should be appended, information about why recovery was ended, and some
1455 : : * other things. See the EndOfWalRecoveryInfo struct for details.
1456 : : */
1457 : : EndOfWalRecoveryInfo *
1458 : 729 : FinishWalRecovery(void)
1459 : : {
1460 : 729 : EndOfWalRecoveryInfo *result = palloc(sizeof(EndOfWalRecoveryInfo));
1461 : : XLogRecPtr lastRec;
1462 : : TimeLineID lastRecTLI;
1463 : : XLogRecPtr endOfLog;
1464 : :
1465 : : /*
1466 : : * Kill WAL receiver, if it's still running, before we continue to write
1467 : : * the startup checkpoint and aborted-contrecord records. It will trump
1468 : : * over these records and subsequent ones if it's still alive when we
1469 : : * start writing WAL.
1470 : : */
1471 : 729 : XLogShutdownWalRcv();
1472 : :
1473 : : /*
1474 : : * Shutdown the slot sync worker to drop any temporary slots acquired by
1475 : : * it and to prevent it from keep trying to fetch the failover slots.
1476 : : *
1477 : : * We do not update the 'synced' column in 'pg_replication_slots' system
1478 : : * view from true to false here, as any failed update could leave 'synced'
1479 : : * column false for some slots. This could cause issues during slot sync
1480 : : * after restarting the server as a standby. While updating the 'synced'
1481 : : * column after switching to the new timeline is an option, it does not
1482 : : * simplify the handling for the 'synced' column. Therefore, we retain the
1483 : : * 'synced' column as true after promotion as it may provide useful
1484 : : * information about the slot origin.
1485 : : */
52 akapila@postgresql.o 1486 :GNC 729 : ShutDownSlotSync();
1487 : :
1488 : : /*
1489 : : * We are now done reading the xlog from stream. Turn off streaming
1490 : : * recovery to force fetching the files (which would be required at end of
1491 : : * recovery, e.g., timeline history file) from archive or pg_wal.
1492 : : *
1493 : : * Note that standby mode must be turned off after killing WAL receiver,
1494 : : * i.e., calling XLogShutdownWalRcv().
1495 : : */
788 heikki.linnakangas@i 1496 [ - + ]:CBC 729 : Assert(!WalRcvStreaming());
1497 : 729 : StandbyMode = false;
1498 : :
1499 : : /*
1500 : : * Determine where to start writing WAL next.
1501 : : *
1502 : : * Re-fetch the last valid or last applied record, so we can identify the
1503 : : * exact endpoint of what we consider the valid portion of WAL. There may
1504 : : * be an incomplete continuation record after that, in which case
1505 : : * 'abortedRecPtr' and 'missingContrecPtr' are set and the caller will
1506 : : * write a special OVERWRITE_CONTRECORD message to mark that the rest of
1507 : : * it is intentionally missing. See CreateOverwriteContrecordRecord().
1508 : : *
1509 : : * An important side-effect of this is to load the last page into
1510 : : * xlogreader. The caller uses it to initialize the WAL for writing.
1511 : : */
1512 [ + + ]: 729 : if (!InRecovery)
1513 : : {
1514 : 581 : lastRec = CheckPointLoc;
1515 : 581 : lastRecTLI = CheckPointTLI;
1516 : : }
1517 : : else
1518 : : {
1519 : 148 : lastRec = XLogRecoveryCtl->lastReplayedReadRecPtr;
1520 : 148 : lastRecTLI = XLogRecoveryCtl->lastReplayedTLI;
1521 : : }
738 tmunro@postgresql.or 1522 : 729 : XLogPrefetcherBeginRead(xlogprefetcher, lastRec);
1523 : 729 : (void) ReadRecord(xlogprefetcher, PANIC, false, lastRecTLI);
788 heikki.linnakangas@i 1524 : 729 : endOfLog = xlogreader->EndRecPtr;
1525 : :
1526 : : /*
1527 : : * Remember the TLI in the filename of the XLOG segment containing the
1528 : : * end-of-log. It could be different from the timeline that endOfLog
1529 : : * nominally belongs to, if there was a timeline switch in that segment,
1530 : : * and we were reading the old WAL from a segment belonging to a higher
1531 : : * timeline.
1532 : : */
1533 : 729 : result->endOfLogTLI = xlogreader->seg.ws_tli;
1534 : :
1535 [ + + ]: 729 : if (ArchiveRecoveryRequested)
1536 : : {
1537 : : /*
1538 : : * We are no longer in archive recovery state.
1539 : : *
1540 : : * We are now done reading the old WAL. Turn off archive fetching if
1541 : : * it was active.
1542 : : */
1543 [ - + ]: 46 : Assert(InArchiveRecovery);
1544 : 46 : InArchiveRecovery = false;
1545 : :
1546 : : /*
1547 : : * If the ending log segment is still open, close it (to avoid
1548 : : * problems on Windows with trying to rename or delete an open file).
1549 : : */
1550 [ + - ]: 46 : if (readFile >= 0)
1551 : : {
1552 : 46 : close(readFile);
1553 : 46 : readFile = -1;
1554 : : }
1555 : : }
1556 : :
1557 : : /*
1558 : : * Copy the last partial block to the caller, for initializing the WAL
1559 : : * buffer for appending new WAL.
1560 : : */
1561 [ + + ]: 729 : if (endOfLog % XLOG_BLCKSZ != 0)
1562 : : {
1563 : : char *page;
1564 : : int len;
1565 : : XLogRecPtr pageBeginPtr;
1566 : :
1567 : 713 : pageBeginPtr = endOfLog - (endOfLog % XLOG_BLCKSZ);
1568 [ - + ]: 713 : Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size));
1569 : :
1570 : : /* Copy the valid part of the last block */
1571 : 713 : len = endOfLog % XLOG_BLCKSZ;
1572 : 713 : page = palloc(len);
1573 : 713 : memcpy(page, xlogreader->readBuf, len);
1574 : :
1575 : 713 : result->lastPageBeginPtr = pageBeginPtr;
1576 : 713 : result->lastPage = page;
1577 : : }
1578 : : else
1579 : : {
1580 : : /* There is no partial block to copy. */
1581 : 16 : result->lastPageBeginPtr = endOfLog;
1582 : 16 : result->lastPage = NULL;
1583 : : }
1584 : :
1585 : : /*
1586 : : * Create a comment for the history file to explain why and where timeline
1587 : : * changed.
1588 : : */
1589 : 729 : result->recoveryStopReason = getRecoveryStopReason();
1590 : :
1591 : 729 : result->lastRec = lastRec;
1592 : 729 : result->lastRecTLI = lastRecTLI;
1593 : 729 : result->endOfLog = endOfLog;
1594 : :
1595 : 729 : result->abortedRecPtr = abortedRecPtr;
1596 : 729 : result->missingContrecPtr = missingContrecPtr;
1597 : :
1598 : 729 : result->standby_signal_file_found = standby_signal_file_found;
1599 : 729 : result->recovery_signal_file_found = recovery_signal_file_found;
1600 : :
1601 : 729 : return result;
1602 : : }
1603 : :
1604 : : /*
1605 : : * Clean up the WAL reader and leftovers from restoring WAL from archive
1606 : : */
1607 : : void
1608 : 729 : ShutdownWalRecovery(void)
1609 : : {
1610 : : char recoveryPath[MAXPGPATH];
1611 : :
1612 : : /* Final update of pg_stat_recovery_prefetch. */
738 tmunro@postgresql.or 1613 : 729 : XLogPrefetcherComputeStats(xlogprefetcher);
1614 : :
1615 : : /* Shut down xlogreader */
788 heikki.linnakangas@i 1616 [ + + ]: 729 : if (readFile >= 0)
1617 : : {
1618 : 683 : close(readFile);
1619 : 683 : readFile = -1;
1620 : : }
1621 : 729 : XLogReaderFree(xlogreader);
738 tmunro@postgresql.or 1622 : 729 : XLogPrefetcherFree(xlogprefetcher);
1623 : :
788 heikki.linnakangas@i 1624 [ + + ]: 729 : if (ArchiveRecoveryRequested)
1625 : : {
1626 : : /*
1627 : : * Since there might be a partial WAL segment named RECOVERYXLOG, get
1628 : : * rid of it.
1629 : : */
1630 : 46 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
1631 : 46 : unlink(recoveryPath); /* ignore any error */
1632 : :
1633 : : /* Get rid of any remaining recovered timeline-history file, too */
1634 : 46 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
1635 : 46 : unlink(recoveryPath); /* ignore any error */
1636 : : }
1637 : :
1638 : : /*
1639 : : * We don't need the latch anymore. It's not strictly necessary to disown
1640 : : * it, but let's do it for the sake of tidiness.
1641 : : */
1642 [ + + ]: 729 : if (ArchiveRecoveryRequested)
1643 : 46 : DisownLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
1644 : 729 : }
1645 : :
1646 : : /*
1647 : : * Perform WAL recovery.
1648 : : *
1649 : : * If the system was shut down cleanly, this is never called.
1650 : : */
1651 : : void
1652 : 242 : PerformWalRecovery(void)
1653 : : {
1654 : : XLogRecord *record;
1655 : 242 : bool reachedRecoveryTarget = false;
1656 : : TimeLineID replayTLI;
1657 : :
1658 : : /*
1659 : : * Initialize shared variables for tracking progress of WAL replay, as if
1660 : : * we had just replayed the record before the REDO location (or the
1661 : : * checkpoint record itself, if it's a shutdown checkpoint).
1662 : : */
1663 [ - + ]: 242 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1664 [ + + ]: 242 : if (RedoStartLSN < CheckPointLoc)
1665 : : {
1666 : 143 : XLogRecoveryCtl->lastReplayedReadRecPtr = InvalidXLogRecPtr;
1667 : 143 : XLogRecoveryCtl->lastReplayedEndRecPtr = RedoStartLSN;
1668 : 143 : XLogRecoveryCtl->lastReplayedTLI = RedoStartTLI;
1669 : : }
1670 : : else
1671 : : {
1672 : 99 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
1673 : 99 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
1674 : 99 : XLogRecoveryCtl->lastReplayedTLI = CheckPointTLI;
1675 : : }
1676 : 242 : XLogRecoveryCtl->replayEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
1677 : 242 : XLogRecoveryCtl->replayEndTLI = XLogRecoveryCtl->lastReplayedTLI;
1678 : 242 : XLogRecoveryCtl->recoveryLastXTime = 0;
1679 : 242 : XLogRecoveryCtl->currentChunkStartTime = 0;
1680 : 242 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
1681 : 242 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
1682 : :
1683 : : /* Also ensure XLogReceiptTime has a sane value */
1684 : 242 : XLogReceiptTime = GetCurrentTimestamp();
1685 : :
1686 : : /*
1687 : : * Let postmaster know we've started redo now, so that it can launch the
1688 : : * archiver if necessary.
1689 : : */
1690 [ + + ]: 242 : if (IsUnderPostmaster)
1691 : 233 : SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
1692 : :
1693 : : /*
1694 : : * Allow read-only connections immediately if we're consistent already.
1695 : : */
1696 : 242 : CheckRecoveryConsistency();
1697 : :
1698 : : /*
1699 : : * Find the first record that logically follows the checkpoint --- it
1700 : : * might physically precede it, though.
1701 : : */
1702 [ + + ]: 242 : if (RedoStartLSN < CheckPointLoc)
1703 : : {
1704 : : /* back up to find the record */
1705 : 143 : replayTLI = RedoStartTLI;
738 tmunro@postgresql.or 1706 : 143 : XLogPrefetcherBeginRead(xlogprefetcher, RedoStartLSN);
1707 : 143 : record = ReadRecord(xlogprefetcher, PANIC, false, replayTLI);
1708 : :
1709 : : /*
1710 : : * If a checkpoint record's redo pointer points back to an earlier
1711 : : * LSN, the record at that LSN should be an XLOG_CHECKPOINT_REDO
1712 : : * record.
1713 : : */
178 rhaas@postgresql.org 1714 [ + - ]:GNC 143 : if (record->xl_rmid != RM_XLOG_ID ||
1715 [ - + ]: 143 : (record->xl_info & ~XLR_INFO_MASK) != XLOG_CHECKPOINT_REDO)
178 rhaas@postgresql.org 1716 [ # # ]:UNC 0 : ereport(FATAL,
1717 : : (errmsg("unexpected record type found at redo point %X/%X",
1718 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
1719 : : }
1720 : : else
1721 : : {
1722 : : /* just have to read next record after CheckPoint */
788 heikki.linnakangas@i 1723 [ - + ]:CBC 99 : Assert(xlogreader->ReadRecPtr == CheckPointLoc);
1724 : 99 : replayTLI = CheckPointTLI;
738 tmunro@postgresql.or 1725 : 99 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
1726 : : }
1727 : :
788 heikki.linnakangas@i 1728 [ + + ]: 241 : if (record != NULL)
1729 : : {
1730 : : TimestampTz xtime;
1731 : : PGRUsage ru0;
1732 : :
1733 : 232 : pg_rusage_init(&ru0);
1734 : :
1735 : 232 : InRedo = true;
1736 : :
739 jdavis@postgresql.or 1737 : 232 : RmgrStartup();
1738 : :
788 heikki.linnakangas@i 1739 [ + - ]: 232 : ereport(LOG,
1740 : : (errmsg("redo starts at %X/%X",
1741 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
1742 : :
1743 : : /* Prepare to report progress of the redo phase. */
1744 [ + + ]: 232 : if (!StandbyMode)
1745 : 105 : begin_startup_progress_phase();
1746 : :
1747 : : /*
1748 : : * main redo apply loop
1749 : : */
1750 : : do
1751 : : {
1752 [ + + ]: 2982028 : if (!StandbyMode)
1753 [ - + - - ]: 270868 : ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
1754 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
1755 : :
1756 : : #ifdef WAL_DEBUG
1757 : : if (XLOG_DEBUG)
1758 : : {
1759 : : StringInfoData buf;
1760 : :
1761 : : initStringInfo(&buf);
1762 : : appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
1763 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1764 : : LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
1765 : : xlog_outrec(&buf, xlogreader);
1766 : : appendStringInfoString(&buf, " - ");
1767 : : xlog_outdesc(&buf, xlogreader);
1768 : : elog(LOG, "%s", buf.data);
1769 : : pfree(buf.data);
1770 : : }
1771 : : #endif
1772 : :
1773 : : /* Handle interrupt signals of startup process */
1774 : 2982028 : HandleStartupProcInterrupts();
1775 : :
1776 : : /*
1777 : : * Pause WAL replay, if requested by a hot-standby session via
1778 : : * SetRecoveryPause().
1779 : : *
1780 : : * Note that we intentionally don't take the info_lck spinlock
1781 : : * here. We might therefore read a slightly stale value of the
1782 : : * recoveryPause flag, but it can't be very stale (no worse than
1783 : : * the last spinlock we did acquire). Since a pause request is a
1784 : : * pretty asynchronous thing anyway, possibly responding to it one
1785 : : * WAL record later than we otherwise would is a minor issue, so
1786 : : * it doesn't seem worth adding another spinlock cycle to prevent
1787 : : * that.
1788 : : */
1789 [ - + ]: 2982028 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1790 : : RECOVERY_NOT_PAUSED)
788 heikki.linnakangas@i 1791 :UBC 0 : recoveryPausesHere(false);
1792 : :
1793 : : /*
1794 : : * Have we reached our recovery target?
1795 : : */
788 heikki.linnakangas@i 1796 [ + + ]:CBC 2982028 : if (recoveryStopsBefore(xlogreader))
1797 : : {
1798 : 4 : reachedRecoveryTarget = true;
1799 : 4 : break;
1800 : : }
1801 : :
1802 : : /*
1803 : : * If we've been asked to lag the primary, wait on latch until
1804 : : * enough time has passed.
1805 : : */
1806 [ + + ]: 2982024 : if (recoveryApplyDelay(xlogreader))
1807 : : {
1808 : : /*
1809 : : * We test for paused recovery again here. If user sets
1810 : : * delayed apply, it may be because they expect to pause
1811 : : * recovery in case of problems, so we must test again here
1812 : : * otherwise pausing during the delay-wait wouldn't work.
1813 : : */
1814 [ - + ]: 4 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1815 : : RECOVERY_NOT_PAUSED)
788 heikki.linnakangas@i 1816 :UBC 0 : recoveryPausesHere(false);
1817 : : }
1818 : :
1819 : : /*
1820 : : * Apply the record
1821 : : */
788 heikki.linnakangas@i 1822 :CBC 2982024 : ApplyWalRecord(xlogreader, record, &replayTLI);
1823 : :
1824 : : /* Exit loop if we reached inclusive recovery target */
1825 [ + + ]: 2982020 : if (recoveryStopsAfter(xlogreader))
1826 : : {
1827 : 8 : reachedRecoveryTarget = true;
1828 : 8 : break;
1829 : : }
1830 : :
1831 : : /* Else, try to fetch the next WAL record */
738 tmunro@postgresql.or 1832 : 2982012 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
788 heikki.linnakangas@i 1833 [ + + ]: 2981930 : } while (record != NULL);
1834 : :
1835 : : /*
1836 : : * end of main redo apply loop
1837 : : */
1838 : :
1839 [ + + ]: 146 : if (reachedRecoveryTarget)
1840 : : {
1841 [ - + ]: 12 : if (!reachedConsistency)
788 heikki.linnakangas@i 1842 [ # # ]:UBC 0 : ereport(FATAL,
1843 : : (errmsg("requested recovery stop point is before consistent recovery point")));
1844 : :
1845 : : /*
1846 : : * This is the last point where we can restart recovery with a new
1847 : : * recovery target, if we shutdown and begin again. After this,
1848 : : * Resource Managers may choose to do permanent corrective actions
1849 : : * at end of recovery.
1850 : : */
788 heikki.linnakangas@i 1851 [ - + + - ]:CBC 12 : switch (recoveryTargetAction)
1852 : : {
788 heikki.linnakangas@i 1853 :UBC 0 : case RECOVERY_TARGET_ACTION_SHUTDOWN:
1854 : :
1855 : : /*
1856 : : * exit with special return code to request shutdown of
1857 : : * postmaster. Log messages issued from postmaster.
1858 : : */
1859 : 0 : proc_exit(3);
1860 : :
788 heikki.linnakangas@i 1861 :CBC 6 : case RECOVERY_TARGET_ACTION_PAUSE:
1862 : 6 : SetRecoveryPause(true);
1863 : 6 : recoveryPausesHere(true);
1864 : :
1865 : : /* drop into promote */
1866 : :
1867 : 6 : case RECOVERY_TARGET_ACTION_PROMOTE:
1868 : 6 : break;
1869 : : }
1870 : : }
1871 : :
739 jdavis@postgresql.or 1872 : 140 : RmgrCleanup();
1873 : :
788 heikki.linnakangas@i 1874 [ + - ]: 140 : ereport(LOG,
1875 : : (errmsg("redo done at %X/%X system usage: %s",
1876 : : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1877 : : pg_rusage_show(&ru0))));
1878 : 140 : xtime = GetLatestXTime();
1879 [ + + ]: 140 : if (xtime)
1880 [ + - ]: 33 : ereport(LOG,
1881 : : (errmsg("last completed transaction was at log time %s",
1882 : : timestamptz_to_str(xtime))));
1883 : :
1884 : 140 : InRedo = false;
1885 : : }
1886 : : else
1887 : : {
1888 : : /* there are no WAL records following the checkpoint */
1889 [ + - ]: 9 : ereport(LOG,
1890 : : (errmsg("redo is not required")));
1891 : : }
1892 : :
1893 : : /*
1894 : : * This check is intentionally after the above log messages that indicate
1895 : : * how far recovery went.
1896 : : */
1897 [ + + ]: 149 : if (ArchiveRecoveryRequested &&
1898 [ + + ]: 47 : recoveryTarget != RECOVERY_TARGET_UNSET &&
1899 [ + + ]: 7 : !reachedRecoveryTarget)
1900 [ + - ]: 1 : ereport(FATAL,
1901 : : (errmsg("recovery ended before configured recovery target was reached")));
1902 : 148 : }
1903 : :
1904 : : /*
1905 : : * Subroutine of PerformWalRecovery, to apply one WAL record.
1906 : : */
1907 : : static void
1908 : 2982024 : ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI)
1909 : : {
1910 : : ErrorContextCallback errcallback;
1911 : 2982024 : bool switchedTLI = false;
1912 : :
1913 : : /* Setup error traceback support for ereport() */
1914 : 2982024 : errcallback.callback = rm_redo_error_callback;
1915 : 2982024 : errcallback.arg = (void *) xlogreader;
1916 : 2982024 : errcallback.previous = error_context_stack;
1917 : 2982024 : error_context_stack = &errcallback;
1918 : :
1919 : : /*
1920 : : * TransamVariables->nextXid must be beyond record's xid.
1921 : : */
1922 : 2982024 : AdvanceNextFullTransactionIdPastXid(record->xl_xid);
1923 : :
1924 : : /*
1925 : : * Before replaying this record, check if this record causes the current
1926 : : * timeline to change. The record is already considered to be part of the
1927 : : * new timeline, so we update replayTLI before replaying it. That's
1928 : : * important so that replayEndTLI, which is recorded as the minimum
1929 : : * recovery point's TLI if recovery stops after this record, is set
1930 : : * correctly.
1931 : : */
1932 [ + + ]: 2982024 : if (record->xl_rmid == RM_XLOG_ID)
1933 : : {
1934 : 42783 : TimeLineID newReplayTLI = *replayTLI;
1935 : 42783 : TimeLineID prevReplayTLI = *replayTLI;
1936 : 42783 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
1937 : :
1938 [ + + ]: 42783 : if (info == XLOG_CHECKPOINT_SHUTDOWN)
1939 : : {
1940 : : CheckPoint checkPoint;
1941 : :
1942 : 46 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
1943 : 46 : newReplayTLI = checkPoint.ThisTimeLineID;
1944 : 46 : prevReplayTLI = checkPoint.PrevTimeLineID;
1945 : : }
1946 [ + + ]: 42737 : else if (info == XLOG_END_OF_RECOVERY)
1947 : : {
1948 : : xl_end_of_recovery xlrec;
1949 : :
1950 : 25 : memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
1951 : 25 : newReplayTLI = xlrec.ThisTimeLineID;
1952 : 25 : prevReplayTLI = xlrec.PrevTimeLineID;
1953 : : }
1954 : :
1955 [ + + ]: 42783 : if (newReplayTLI != *replayTLI)
1956 : : {
1957 : : /* Check that it's OK to switch to this TLI */
1958 : 26 : checkTimeLineSwitch(xlogreader->EndRecPtr,
1959 : : newReplayTLI, prevReplayTLI, *replayTLI);
1960 : :
1961 : : /* Following WAL records should be run with new TLI */
1962 : 26 : *replayTLI = newReplayTLI;
1963 : 26 : switchedTLI = true;
1964 : : }
1965 : : }
1966 : :
1967 : : /*
1968 : : * Update shared replayEndRecPtr before replaying this record, so that
1969 : : * XLogFlush will update minRecoveryPoint correctly.
1970 : : */
1971 [ + + ]: 2982024 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1972 : 2982024 : XLogRecoveryCtl->replayEndRecPtr = xlogreader->EndRecPtr;
1973 : 2982024 : XLogRecoveryCtl->replayEndTLI = *replayTLI;
1974 : 2982024 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
1975 : :
1976 : : /*
1977 : : * If we are attempting to enter Hot Standby mode, process XIDs we see
1978 : : */
1979 [ + + ]: 2982024 : if (standbyState >= STANDBY_INITIALIZED &&
1980 [ + + ]: 2734809 : TransactionIdIsValid(record->xl_xid))
1981 : 2683162 : RecordKnownAssignedTransactionIds(record->xl_xid);
1982 : :
1983 : : /*
1984 : : * Some XLOG record types that are related to recovery are processed
1985 : : * directly here, rather than in xlog_redo()
1986 : : */
1987 [ + + ]: 2982024 : if (record->xl_rmid == RM_XLOG_ID)
1988 : 42783 : xlogrecovery_redo(xlogreader, *replayTLI);
1989 : :
1990 : : /* Now apply the WAL record itself */
739 jdavis@postgresql.or 1991 : 2982024 : GetRmgr(record->xl_rmid).rm_redo(xlogreader);
1992 : :
1993 : : /*
1994 : : * After redo, check whether the backup pages associated with the WAL
1995 : : * record are consistent with the existing pages. This check is done only
1996 : : * if consistency check is enabled for this record.
1997 : : */
788 heikki.linnakangas@i 1998 [ + + ]: 2982020 : if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
1999 : 3225 : verifyBackupPageConsistency(xlogreader);
2000 : :
2001 : : /* Pop the error context stack */
2002 : 2982020 : error_context_stack = errcallback.previous;
2003 : :
2004 : : /*
2005 : : * Update lastReplayedEndRecPtr after this record has been successfully
2006 : : * replayed.
2007 : : */
2008 [ + + ]: 2982020 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2009 : 2982020 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
2010 : 2982020 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
2011 : 2982020 : XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
2012 : 2982020 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2013 : :
2014 : : /* ------
2015 : : * Wakeup walsenders:
2016 : : *
2017 : : * On the standby, the WAL is flushed first (which will only wake up
2018 : : * physical walsenders) and then applied, which will only wake up logical
2019 : : * walsenders.
2020 : : *
2021 : : * Indeed, logical walsenders on standby can't decode and send data until
2022 : : * it's been applied.
2023 : : *
2024 : : * Physical walsenders don't need to be woken up during replay unless
2025 : : * cascading replication is allowed and time line change occurred (so that
2026 : : * they can notice that they are on a new time line).
2027 : : *
2028 : : * That's why the wake up conditions are for:
2029 : : *
2030 : : * - physical walsenders in case of new time line and cascade
2031 : : * replication is allowed
2032 : : * - logical walsenders in case cascade replication is allowed (could not
2033 : : * be created otherwise)
2034 : : * ------
2035 : : */
372 andres@anarazel.de 2036 [ + + + + ]: 2982020 : if (AllowCascadeReplication())
2037 : 2793003 : WalSndWakeup(switchedTLI, true);
2038 : :
2039 : : /*
2040 : : * If rm_redo called XLogRequestWalReceiverReply, then we wake up the
2041 : : * receiver so that it notices the updated lastReplayedEndRecPtr and sends
2042 : : * a reply to the primary.
2043 : : */
788 heikki.linnakangas@i 2044 [ - + ]: 2982020 : if (doRequestWalReceiverReply)
2045 : : {
788 heikki.linnakangas@i 2046 :UBC 0 : doRequestWalReceiverReply = false;
2047 : 0 : WalRcvForceReply();
2048 : : }
2049 : :
2050 : : /* Allow read-only connections if we're consistent now */
788 heikki.linnakangas@i 2051 :CBC 2982020 : CheckRecoveryConsistency();
2052 : :
2053 : : /* Is this a timeline switch? */
2054 [ + + ]: 2982020 : if (switchedTLI)
2055 : : {
2056 : : /*
2057 : : * Before we continue on the new timeline, clean up any (possibly
2058 : : * bogus) future WAL segments on the old timeline.
2059 : : */
2060 : 26 : RemoveNonParentXlogFiles(xlogreader->EndRecPtr, *replayTLI);
2061 : :
2062 : : /* Reset the prefetcher. */
738 tmunro@postgresql.or 2063 : 26 : XLogPrefetchReconfigure();
2064 : : }
788 heikki.linnakangas@i 2065 : 2982020 : }
2066 : :
2067 : : /*
2068 : : * Some XLOG RM record types that are directly related to WAL recovery are
2069 : : * handled here rather than in the xlog_redo()
2070 : : */
2071 : : static void
2072 : 42783 : xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
2073 : : {
2074 : 42783 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2075 : 42783 : XLogRecPtr lsn = record->EndRecPtr;
2076 : :
2077 [ - + ]: 42783 : Assert(XLogRecGetRmid(record) == RM_XLOG_ID);
2078 : :
2079 [ + + ]: 42783 : if (info == XLOG_OVERWRITE_CONTRECORD)
2080 : : {
2081 : : /* Verify the payload of a XLOG_OVERWRITE_CONTRECORD record. */
2082 : : xl_overwrite_contrecord xlrec;
2083 : :
2084 : 1 : memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
2085 [ - + ]: 1 : if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
788 heikki.linnakangas@i 2086 [ # # ]:UBC 0 : elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
2087 : : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2088 : : LSN_FORMAT_ARGS(record->overwrittenRecPtr));
2089 : :
2090 : : /* We have safely skipped the aborted record */
753 alvherre@alvh.no-ip. 2091 :CBC 1 : abortedRecPtr = InvalidXLogRecPtr;
2092 : 1 : missingContrecPtr = InvalidXLogRecPtr;
2093 : :
788 heikki.linnakangas@i 2094 [ + - ]: 1 : ereport(LOG,
2095 : : (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
2096 : : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2097 : : timestamptz_to_str(xlrec.overwrite_time))));
2098 : :
2099 : : /* Verifying the record should only happen once */
2100 : 1 : record->overwrittenRecPtr = InvalidXLogRecPtr;
2101 : : }
2102 [ + + ]: 42782 : else if (info == XLOG_BACKUP_END)
2103 : : {
2104 : : XLogRecPtr startpoint;
2105 : :
2106 : 111 : memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
2107 : :
2108 [ + + ]: 111 : if (backupStartPoint == startpoint)
2109 : : {
2110 : : /*
2111 : : * We have reached the end of base backup, the point where
2112 : : * pg_backup_stop() was done. The data on disk is now consistent
2113 : : * (assuming we have also reached minRecoveryPoint). Set
2114 : : * backupEndPoint to the current LSN, so that the next call to
2115 : : * CheckRecoveryConsistency() will notice it and do the
2116 : : * end-of-backup processing.
2117 : : */
2118 [ + + ]: 88 : elog(DEBUG1, "end of backup record reached");
2119 : :
2120 : 88 : backupEndPoint = lsn;
2121 : : }
2122 : : else
2123 [ + + ]: 23 : elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X",
2124 : : LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
2125 : : }
2126 : 42783 : }
2127 : :
2128 : : /*
2129 : : * Verify that, in non-test mode, ./pg_tblspc doesn't contain any real
2130 : : * directories.
2131 : : *
2132 : : * Replay of database creation XLOG records for databases that were later
2133 : : * dropped can create fake directories in pg_tblspc. By the time consistency
2134 : : * is reached these directories should have been removed; here we verify
2135 : : * that this did indeed happen. This is to be called at the point where
2136 : : * consistent state is reached.
2137 : : *
2138 : : * allow_in_place_tablespaces turns the PANIC into a WARNING, which is
2139 : : * useful for testing purposes, and also allows for an escape hatch in case
2140 : : * things go south.
2141 : : */
2142 : : static void
626 alvherre@alvh.no-ip. 2143 : 145 : CheckTablespaceDirectory(void)
2144 : : {
2145 : : DIR *dir;
2146 : : struct dirent *de;
2147 : :
2148 : 145 : dir = AllocateDir("pg_tblspc");
2149 [ + + ]: 458 : while ((de = ReadDir(dir, "pg_tblspc")) != NULL)
2150 : : {
2151 : : char path[MAXPGPATH + 10];
2152 : :
2153 : : /* Skip entries of non-oid names */
2154 [ + + ]: 313 : if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
2155 : 290 : continue;
2156 : :
2157 : 23 : snprintf(path, sizeof(path), "pg_tblspc/%s", de->d_name);
2158 : :
2159 [ + + ]: 23 : if (get_dirent_type(path, de, false, ERROR) != PGFILETYPE_LNK)
2160 [ + - + - ]: 22 : ereport(allow_in_place_tablespaces ? WARNING : PANIC,
2161 : : (errcode(ERRCODE_DATA_CORRUPTED),
2162 : : errmsg("unexpected directory entry \"%s\" found in %s",
2163 : : de->d_name, "pg_tblspc/"),
2164 : : errdetail("All directory entries in pg_tblspc/ should be symbolic links."),
2165 : : errhint("Remove those directories, or set allow_in_place_tablespaces to ON transiently to let recovery complete.")));
2166 : : }
2167 : 145 : }
2168 : :
2169 : : /*
2170 : : * Checks if recovery has reached a consistent state. When consistency is
2171 : : * reached and we have a valid starting standby snapshot, tell postmaster
2172 : : * that it can start accepting read-only connections.
2173 : : */
2174 : : static void
788 heikki.linnakangas@i 2175 : 2982266 : CheckRecoveryConsistency(void)
2176 : : {
2177 : : XLogRecPtr lastReplayedEndRecPtr;
2178 : : TimeLineID lastReplayedTLI;
2179 : :
2180 : : /*
2181 : : * During crash recovery, we don't reach a consistent state until we've
2182 : : * replayed all the WAL.
2183 : : */
2184 [ + + ]: 2982266 : if (XLogRecPtrIsInvalid(minRecoveryPoint))
2185 : 265756 : return;
2186 : :
2187 [ - + ]: 2716510 : Assert(InArchiveRecovery);
2188 : :
2189 : : /*
2190 : : * assume that we are called in the startup process, and hence don't need
2191 : : * a lock to read lastReplayedEndRecPtr
2192 : : */
2193 : 2716510 : lastReplayedEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
2194 : 2716510 : lastReplayedTLI = XLogRecoveryCtl->lastReplayedTLI;
2195 : :
2196 : : /*
2197 : : * Have we reached the point where our base backup was completed?
2198 : : */
2199 [ + + ]: 2716510 : if (!XLogRecPtrIsInvalid(backupEndPoint) &&
2200 [ + + ]: 1603 : backupEndPoint <= lastReplayedEndRecPtr)
2201 : : {
80 michael@paquier.xyz 2202 : 95 : XLogRecPtr saveBackupStartPoint = backupStartPoint;
2203 : 95 : XLogRecPtr saveBackupEndPoint = backupEndPoint;
2204 : :
788 heikki.linnakangas@i 2205 [ + + ]: 95 : elog(DEBUG1, "end of backup reached");
2206 : :
2207 : : /*
2208 : : * We have reached the end of base backup, as indicated by pg_control.
2209 : : * Update the control file accordingly.
2210 : : */
2211 : 95 : ReachedEndOfBackup(lastReplayedEndRecPtr, lastReplayedTLI);
2212 : 95 : backupStartPoint = InvalidXLogRecPtr;
2213 : 95 : backupEndPoint = InvalidXLogRecPtr;
2214 : 95 : backupEndRequired = false;
2215 : :
80 michael@paquier.xyz 2216 [ + - ]: 95 : ereport(LOG,
2217 : : (errmsg("completed backup recovery with redo LSN %X/%X and end LSN %X/%X",
2218 : : LSN_FORMAT_ARGS(saveBackupStartPoint),
2219 : : LSN_FORMAT_ARGS(saveBackupEndPoint))));
2220 : : }
2221 : :
2222 : : /*
2223 : : * Have we passed our safe starting point? Note that minRecoveryPoint is
2224 : : * known to be incorrectly set if recovering from a backup, until the
2225 : : * XLOG_BACKUP_END arrives to advise us of the correct minRecoveryPoint.
2226 : : * All we know prior to that is that we're not consistent yet.
2227 : : */
788 heikki.linnakangas@i 2228 [ + + + + ]: 2716510 : if (!reachedConsistency && !backupEndRequired &&
2229 [ + + ]: 28879 : minRecoveryPoint <= lastReplayedEndRecPtr)
2230 : : {
2231 : : /*
2232 : : * Check to see if the XLOG sequence contained any unresolved
2233 : : * references to uninitialized pages.
2234 : : */
2235 : 145 : XLogCheckInvalidPages();
2236 : :
2237 : : /*
2238 : : * Check that pg_tblspc doesn't contain any real directories. Replay
2239 : : * of Database/CREATE_* records may have created fictitious tablespace
2240 : : * directories that should have been removed by the time consistency
2241 : : * was reached.
2242 : : */
626 alvherre@alvh.no-ip. 2243 : 145 : CheckTablespaceDirectory();
2244 : :
788 heikki.linnakangas@i 2245 : 145 : reachedConsistency = true;
2246 [ + - ]: 145 : ereport(LOG,
2247 : : (errmsg("consistent recovery state reached at %X/%X",
2248 : : LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
2249 : : }
2250 : :
2251 : : /*
2252 : : * Have we got a valid starting snapshot that will allow queries to be
2253 : : * run? If so, we can tell postmaster that the database is consistent now,
2254 : : * enabling connections.
2255 : : */
2256 [ + + ]: 2716510 : if (standbyState == STANDBY_SNAPSHOT_READY &&
2257 [ + + + + ]: 2716232 : !LocalHotStandbyActive &&
2258 [ + - ]: 138 : reachedConsistency &&
2259 : : IsUnderPostmaster)
2260 : : {
2261 [ - + ]: 138 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2262 : 138 : XLogRecoveryCtl->SharedHotStandbyActive = true;
2263 : 138 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2264 : :
2265 : 138 : LocalHotStandbyActive = true;
2266 : :
2267 : 138 : SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
2268 : : }
2269 : : }
2270 : :
2271 : : /*
2272 : : * Error context callback for errors occurring during rm_redo().
2273 : : */
2274 : : static void
2275 : 147 : rm_redo_error_callback(void *arg)
2276 : : {
2277 : 147 : XLogReaderState *record = (XLogReaderState *) arg;
2278 : : StringInfoData buf;
2279 : :
2280 : 147 : initStringInfo(&buf);
2281 : 147 : xlog_outdesc(&buf, record);
2282 : 147 : xlog_block_info(&buf, record);
2283 : :
2284 : : /* translator: %s is a WAL record description */
2285 : 147 : errcontext("WAL redo at %X/%X for %s",
2286 : 147 : LSN_FORMAT_ARGS(record->ReadRecPtr),
2287 : : buf.data);
2288 : :
2289 : 147 : pfree(buf.data);
2290 : 147 : }
2291 : :
2292 : : /*
2293 : : * Returns a string describing an XLogRecord, consisting of its identity
2294 : : * optionally followed by a colon, a space, and a further description.
2295 : : */
2296 : : void
2297 : 147 : xlog_outdesc(StringInfo buf, XLogReaderState *record)
2298 : : {
739 jdavis@postgresql.or 2299 : 147 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
788 heikki.linnakangas@i 2300 : 147 : uint8 info = XLogRecGetInfo(record);
2301 : : const char *id;
2302 : :
739 jdavis@postgresql.or 2303 : 147 : appendStringInfoString(buf, rmgr.rm_name);
788 heikki.linnakangas@i 2304 : 147 : appendStringInfoChar(buf, '/');
2305 : :
739 jdavis@postgresql.or 2306 : 147 : id = rmgr.rm_identify(info);
788 heikki.linnakangas@i 2307 [ - + ]: 147 : if (id == NULL)
788 heikki.linnakangas@i 2308 :UBC 0 : appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
2309 : : else
788 heikki.linnakangas@i 2310 :CBC 147 : appendStringInfo(buf, "%s: ", id);
2311 : :
739 jdavis@postgresql.or 2312 : 147 : rmgr.rm_desc(buf, record);
788 heikki.linnakangas@i 2313 : 147 : }
2314 : :
2315 : : #ifdef WAL_DEBUG
2316 : :
2317 : : static void
2318 : : xlog_outrec(StringInfo buf, XLogReaderState *record)
2319 : : {
2320 : : appendStringInfo(buf, "prev %X/%X; xid %u",
2321 : : LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
2322 : : XLogRecGetXid(record));
2323 : :
2324 : : appendStringInfo(buf, "; len %u",
2325 : : XLogRecGetDataLen(record));
2326 : :
2327 : : xlog_block_info(buf, record);
2328 : : }
2329 : : #endif /* WAL_DEBUG */
2330 : :
2331 : : /*
2332 : : * Returns a string giving information about all the blocks in an
2333 : : * XLogRecord.
2334 : : */
2335 : : static void
2336 : 147 : xlog_block_info(StringInfo buf, XLogReaderState *record)
2337 : : {
2338 : : int block_id;
2339 : :
2340 : : /* decode block references */
758 tmunro@postgresql.or 2341 [ + + ]: 205 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2342 : : {
2343 : : RelFileLocator rlocator;
2344 : : ForkNumber forknum;
2345 : : BlockNumber blk;
2346 : :
734 tgl@sss.pgh.pa.us 2347 [ - + ]: 58 : if (!XLogRecGetBlockTagExtended(record, block_id,
2348 : : &rlocator, &forknum, &blk, NULL))
788 heikki.linnakangas@i 2349 :UBC 0 : continue;
2350 : :
788 heikki.linnakangas@i 2351 [ + + ]:CBC 58 : if (forknum != MAIN_FORKNUM)
564 rhaas@postgresql.org 2352 : 3 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
2353 : : block_id,
2354 : : rlocator.spcOid, rlocator.dbOid,
2355 : : rlocator.relNumber,
2356 : : forknum,
2357 : : blk);
2358 : : else
2359 : 55 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
2360 : : block_id,
2361 : : rlocator.spcOid, rlocator.dbOid,
2362 : : rlocator.relNumber,
2363 : : blk);
788 heikki.linnakangas@i 2364 [ + + ]: 58 : if (XLogRecHasBlockImage(record, block_id))
2365 : 38 : appendStringInfoString(buf, " FPW");
2366 : : }
2367 : 147 : }
2368 : :
2369 : :
2370 : : /*
2371 : : * Check that it's OK to switch to new timeline during recovery.
2372 : : *
2373 : : * 'lsn' is the address of the shutdown checkpoint record we're about to
2374 : : * replay. (Currently, timeline can only change at a shutdown checkpoint).
2375 : : */
2376 : : static void
2377 : 26 : checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
2378 : : TimeLineID replayTLI)
2379 : : {
2380 : : /* Check that the record agrees on what the current (old) timeline is */
2381 [ - + ]: 26 : if (prevTLI != replayTLI)
788 heikki.linnakangas@i 2382 [ # # ]:UBC 0 : ereport(PANIC,
2383 : : (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
2384 : : prevTLI, replayTLI)));
2385 : :
2386 : : /*
2387 : : * The new timeline better be in the list of timelines we expect to see,
2388 : : * according to the timeline history. It should also not decrease.
2389 : : */
788 heikki.linnakangas@i 2390 [ + - - + ]:CBC 26 : if (newTLI < replayTLI || !tliInHistory(newTLI, expectedTLEs))
788 heikki.linnakangas@i 2391 [ # # ]:UBC 0 : ereport(PANIC,
2392 : : (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
2393 : : newTLI, replayTLI)));
2394 : :
2395 : : /*
2396 : : * If we have not yet reached min recovery point, and we're about to
2397 : : * switch to a timeline greater than the timeline of the min recovery
2398 : : * point: trouble. After switching to the new timeline, we could not
2399 : : * possibly visit the min recovery point on the correct timeline anymore.
2400 : : * This can happen if there is a newer timeline in the archive that
2401 : : * branched before the timeline the min recovery point is on, and you
2402 : : * attempt to do PITR to the new timeline.
2403 : : */
788 heikki.linnakangas@i 2404 [ + + ]:CBC 26 : if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
2405 [ + + ]: 24 : lsn < minRecoveryPoint &&
2406 [ - + ]: 10 : newTLI > minRecoveryPointTLI)
788 heikki.linnakangas@i 2407 [ # # ]:UBC 0 : ereport(PANIC,
2408 : : (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
2409 : : newTLI,
2410 : : LSN_FORMAT_ARGS(minRecoveryPoint),
2411 : : minRecoveryPointTLI)));
2412 : :
2413 : : /* Looks good */
788 heikki.linnakangas@i 2414 :CBC 26 : }
2415 : :
2416 : :
2417 : : /*
2418 : : * Extract timestamp from WAL record.
2419 : : *
2420 : : * If the record contains a timestamp, returns true, and saves the timestamp
2421 : : * in *recordXtime. If the record type has no timestamp, returns false.
2422 : : * Currently, only transaction commit/abort records and restore points contain
2423 : : * timestamps.
2424 : : */
2425 : : static bool
2426 : 40451 : getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
2427 : : {
2428 : 40451 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2429 : 40451 : uint8 xact_info = info & XLOG_XACT_OPMASK;
2430 : 40451 : uint8 rmid = XLogRecGetRmid(record);
2431 : :
2432 [ + + + - ]: 40451 : if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
2433 : : {
2434 : 3 : *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
2435 : 3 : return true;
2436 : : }
2437 [ + - + + : 40448 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT ||
+ + ]
2438 : : xact_info == XLOG_XACT_COMMIT_PREPARED))
2439 : : {
2440 : 37218 : *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
2441 : 37218 : return true;
2442 : : }
2443 [ + - + + : 3230 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT ||
+ - ]
2444 : : xact_info == XLOG_XACT_ABORT_PREPARED))
2445 : : {
2446 : 3230 : *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
2447 : 3230 : return true;
2448 : : }
788 heikki.linnakangas@i 2449 :UBC 0 : return false;
2450 : : }
2451 : :
2452 : : /*
2453 : : * Checks whether the current buffer page and backup page stored in the
2454 : : * WAL record are consistent or not. Before comparing the two pages, a
2455 : : * masking can be applied to the pages to ignore certain areas like hint bits,
2456 : : * unused space between pd_lower and pd_upper among other things. This
2457 : : * function should be called once WAL replay has been completed for a
2458 : : * given record.
2459 : : */
2460 : : static void
788 heikki.linnakangas@i 2461 :CBC 3225 : verifyBackupPageConsistency(XLogReaderState *record)
2462 : : {
739 jdavis@postgresql.or 2463 : 3225 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
2464 : : RelFileLocator rlocator;
2465 : : ForkNumber forknum;
2466 : : BlockNumber blkno;
2467 : : int block_id;
2468 : :
2469 : : /* Records with no backup blocks have no need for consistency checks. */
788 heikki.linnakangas@i 2470 [ - + ]: 3225 : if (!XLogRecHasAnyBlockRefs(record))
788 heikki.linnakangas@i 2471 :UBC 0 : return;
2472 : :
788 heikki.linnakangas@i 2473 [ - + ]:CBC 3225 : Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
2474 : :
758 tmunro@postgresql.or 2475 [ + + ]: 8445 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2476 : : {
2477 : : Buffer buf;
2478 : : Page page;
2479 : :
734 tgl@sss.pgh.pa.us 2480 [ - + ]: 5220 : if (!XLogRecGetBlockTagExtended(record, block_id,
2481 : : &rlocator, &forknum, &blkno, NULL))
2482 : : {
2483 : : /*
2484 : : * WAL record doesn't contain a block reference with the given id.
2485 : : * Do nothing.
2486 : : */
788 heikki.linnakangas@i 2487 :UBC 0 : continue;
2488 : : }
2489 : :
788 heikki.linnakangas@i 2490 [ - + ]:CBC 5220 : Assert(XLogRecHasBlockImage(record, block_id));
2491 : :
2492 [ - + ]: 5220 : if (XLogRecBlockImageApply(record, block_id))
2493 : : {
2494 : : /*
2495 : : * WAL record has already applied the page, so bypass the
2496 : : * consistency check as that would result in comparing the full
2497 : : * page stored in the record with itself.
2498 : : */
788 heikki.linnakangas@i 2499 :UBC 0 : continue;
2500 : : }
2501 : :
2502 : : /*
2503 : : * Read the contents from the current buffer and store it in a
2504 : : * temporary page.
2505 : : */
648 rhaas@postgresql.org 2506 :CBC 5220 : buf = XLogReadBufferExtended(rlocator, forknum, blkno,
2507 : : RBM_NORMAL_NO_LOG,
2508 : : InvalidBuffer);
788 heikki.linnakangas@i 2509 [ - + ]: 5220 : if (!BufferIsValid(buf))
788 heikki.linnakangas@i 2510 :UBC 0 : continue;
2511 : :
788 heikki.linnakangas@i 2512 :CBC 5220 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
2513 : 5220 : page = BufferGetPage(buf);
2514 : :
2515 : : /*
2516 : : * Take a copy of the local page where WAL has been applied to have a
2517 : : * comparison base before masking it...
2518 : : */
2519 : 5220 : memcpy(replay_image_masked, page, BLCKSZ);
2520 : :
2521 : : /* No need for this page anymore now that a copy is in. */
2522 : 5220 : UnlockReleaseBuffer(buf);
2523 : :
2524 : : /*
2525 : : * If the block LSN is already ahead of this WAL record, we can't
2526 : : * expect contents to match. This can happen if recovery is
2527 : : * restarted.
2528 : : */
2529 [ - + ]: 5220 : if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
788 heikki.linnakangas@i 2530 :UBC 0 : continue;
2531 : :
2532 : : /*
2533 : : * Read the contents from the backup copy, stored in WAL record and
2534 : : * store it in a temporary page. There is no need to allocate a new
2535 : : * page here, a local buffer is fine to hold its contents and a mask
2536 : : * can be directly applied on it.
2537 : : */
788 heikki.linnakangas@i 2538 [ - + ]:CBC 5220 : if (!RestoreBlockImage(record, block_id, primary_image_masked))
583 michael@paquier.xyz 2539 [ # # ]:UBC 0 : ereport(ERROR,
2540 : : (errcode(ERRCODE_INTERNAL_ERROR),
2541 : : errmsg_internal("%s", record->errormsg_buf)));
2542 : :
2543 : : /*
2544 : : * If masking function is defined, mask both the primary and replay
2545 : : * images
2546 : : */
739 jdavis@postgresql.or 2547 [ + - ]:CBC 5220 : if (rmgr.rm_mask != NULL)
2548 : : {
2549 : 5220 : rmgr.rm_mask(replay_image_masked, blkno);
2550 : 5220 : rmgr.rm_mask(primary_image_masked, blkno);
2551 : : }
2552 : :
2553 : : /* Time to compare the primary and replay images. */
788 heikki.linnakangas@i 2554 [ - + ]: 5220 : if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
2555 : : {
788 heikki.linnakangas@i 2556 [ # # ]:UBC 0 : elog(FATAL,
2557 : : "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
2558 : : rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
2559 : : forknum, blkno);
2560 : : }
2561 : : }
2562 : : }
2563 : :
2564 : : /*
2565 : : * For point-in-time recovery, this function decides whether we want to
2566 : : * stop applying the XLOG before the current record.
2567 : : *
2568 : : * Returns true if we are stopping, false otherwise. If stopping, some
2569 : : * information is saved in recoveryStopXid et al for use in annotating the
2570 : : * new timeline's history file.
2571 : : */
2572 : : static bool
788 heikki.linnakangas@i 2573 :CBC 2982028 : recoveryStopsBefore(XLogReaderState *record)
2574 : : {
2575 : 2982028 : bool stopsHere = false;
2576 : : uint8 xact_info;
2577 : : bool isCommit;
2578 : 2982028 : TimestampTz recordXtime = 0;
2579 : : TransactionId recordXid;
2580 : :
2581 : : /*
2582 : : * Ignore recovery target settings when not in archive recovery (meaning
2583 : : * we are in crash recovery).
2584 : : */
2585 [ + + ]: 2982028 : if (!ArchiveRecoveryRequested)
2586 : 247201 : return false;
2587 : :
2588 : : /* Check if we should stop as soon as reaching consistency */
2589 [ + + + + ]: 2734827 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2590 : : {
2591 [ + - ]: 1 : ereport(LOG,
2592 : : (errmsg("recovery stopping after reaching consistency")));
2593 : :
2594 : 1 : recoveryStopAfter = false;
2595 : 1 : recoveryStopXid = InvalidTransactionId;
2596 : 1 : recoveryStopLSN = InvalidXLogRecPtr;
2597 : 1 : recoveryStopTime = 0;
2598 : 1 : recoveryStopName[0] = '\0';
2599 : 1 : return true;
2600 : : }
2601 : :
2602 : : /* Check if target LSN has been reached */
2603 [ + + ]: 2734826 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
2604 [ + + ]: 10963 : !recoveryTargetInclusive &&
2605 [ + + ]: 333 : record->ReadRecPtr >= recoveryTargetLSN)
2606 : : {
2607 : 1 : recoveryStopAfter = false;
2608 : 1 : recoveryStopXid = InvalidTransactionId;
2609 : 1 : recoveryStopLSN = record->ReadRecPtr;
2610 : 1 : recoveryStopTime = 0;
2611 : 1 : recoveryStopName[0] = '\0';
2612 [ + - ]: 1 : ereport(LOG,
2613 : : (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
2614 : : LSN_FORMAT_ARGS(recoveryStopLSN))));
2615 : 1 : return true;
2616 : : }
2617 : :
2618 : : /* Otherwise we only consider stopping before COMMIT or ABORT records. */
2619 [ + + ]: 2734825 : if (XLogRecGetRmid(record) != RM_XACT_ID)
2620 : 2714370 : return false;
2621 : :
2622 : 20455 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
2623 : :
2624 [ + + ]: 20455 : if (xact_info == XLOG_XACT_COMMIT)
2625 : : {
2626 : 18585 : isCommit = true;
2627 : 18585 : recordXid = XLogRecGetXid(record);
2628 : : }
2629 [ + + ]: 1870 : else if (xact_info == XLOG_XACT_COMMIT_PREPARED)
2630 : : {
2631 : 24 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
2632 : : xl_xact_parsed_commit parsed;
2633 : :
2634 : 24 : isCommit = true;
2635 : 24 : ParseCommitRecord(XLogRecGetInfo(record),
2636 : : xlrec,
2637 : : &parsed);
2638 : 24 : recordXid = parsed.twophase_xid;
2639 : : }
2640 [ + + ]: 1846 : else if (xact_info == XLOG_XACT_ABORT)
2641 : : {
2642 : 1603 : isCommit = false;
2643 : 1603 : recordXid = XLogRecGetXid(record);
2644 : : }
2645 [ + + ]: 243 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
2646 : : {
2647 : 12 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
2648 : : xl_xact_parsed_abort parsed;
2649 : :
2650 : 12 : isCommit = false;
2651 : 12 : ParseAbortRecord(XLogRecGetInfo(record),
2652 : : xlrec,
2653 : : &parsed);
2654 : 12 : recordXid = parsed.twophase_xid;
2655 : : }
2656 : : else
2657 : 231 : return false;
2658 : :
2659 [ + + - + ]: 20224 : if (recoveryTarget == RECOVERY_TARGET_XID && !recoveryTargetInclusive)
2660 : : {
2661 : : /*
2662 : : * There can be only one transaction end record with this exact
2663 : : * transactionid
2664 : : *
2665 : : * when testing for an xid, we MUST test for equality only, since
2666 : : * transactions are numbered in the order they start, not the order
2667 : : * they complete. A higher numbered xid will complete before you about
2668 : : * 50% of the time...
2669 : : */
788 heikki.linnakangas@i 2670 :UBC 0 : stopsHere = (recordXid == recoveryTargetXid);
2671 : : }
2672 : :
2673 : : /*
2674 : : * Note: we must fetch recordXtime regardless of recoveryTarget setting.
2675 : : * We don't expect getRecordTimestamp ever to fail, since we already know
2676 : : * this is a commit or abort record; but test its result anyway.
2677 : : */
451 tgl@sss.pgh.pa.us 2678 [ + - ]:CBC 20224 : if (getRecordTimestamp(record, &recordXtime) &&
2679 [ + + ]: 20224 : recoveryTarget == RECOVERY_TARGET_TIME)
2680 : : {
2681 : : /*
2682 : : * There can be many transactions that share the same commit time, so
2683 : : * we stop after the last one, if we are inclusive, or stop at the
2684 : : * first one if we are exclusive
2685 : : */
788 heikki.linnakangas@i 2686 [ + - ]: 8 : if (recoveryTargetInclusive)
2687 : 8 : stopsHere = (recordXtime > recoveryTargetTime);
2688 : : else
788 heikki.linnakangas@i 2689 :UBC 0 : stopsHere = (recordXtime >= recoveryTargetTime);
2690 : : }
2691 : :
788 heikki.linnakangas@i 2692 [ + + ]:CBC 20224 : if (stopsHere)
2693 : : {
2694 : 2 : recoveryStopAfter = false;
2695 : 2 : recoveryStopXid = recordXid;
2696 : 2 : recoveryStopTime = recordXtime;
2697 : 2 : recoveryStopLSN = InvalidXLogRecPtr;
2698 : 2 : recoveryStopName[0] = '\0';
2699 : :
2700 [ + - ]: 2 : if (isCommit)
2701 : : {
2702 [ + - ]: 2 : ereport(LOG,
2703 : : (errmsg("recovery stopping before commit of transaction %u, time %s",
2704 : : recoveryStopXid,
2705 : : timestamptz_to_str(recoveryStopTime))));
2706 : : }
2707 : : else
2708 : : {
788 heikki.linnakangas@i 2709 [ # # ]:UBC 0 : ereport(LOG,
2710 : : (errmsg("recovery stopping before abort of transaction %u, time %s",
2711 : : recoveryStopXid,
2712 : : timestamptz_to_str(recoveryStopTime))));
2713 : : }
2714 : : }
2715 : :
788 heikki.linnakangas@i 2716 :CBC 20224 : return stopsHere;
2717 : : }
2718 : :
2719 : : /*
2720 : : * Same as recoveryStopsBefore, but called after applying the record.
2721 : : *
2722 : : * We also track the timestamp of the latest applied COMMIT/ABORT
2723 : : * record in XLogRecoveryCtl->recoveryLastXTime.
2724 : : */
2725 : : static bool
2726 : 2982020 : recoveryStopsAfter(XLogReaderState *record)
2727 : : {
2728 : : uint8 info;
2729 : : uint8 xact_info;
2730 : : uint8 rmid;
313 2731 : 2982020 : TimestampTz recordXtime = 0;
2732 : :
2733 : : /*
2734 : : * Ignore recovery target settings when not in archive recovery (meaning
2735 : : * we are in crash recovery).
2736 : : */
788 2737 [ + + ]: 2982020 : if (!ArchiveRecoveryRequested)
2738 : 247201 : return false;
2739 : :
2740 : 2734819 : info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2741 : 2734819 : rmid = XLogRecGetRmid(record);
2742 : :
2743 : : /*
2744 : : * There can be many restore points that share the same name; we stop at
2745 : : * the first one.
2746 : : */
2747 [ + + + + ]: 2734819 : if (recoveryTarget == RECOVERY_TARGET_NAME &&
2748 [ + + ]: 29 : rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
2749 : : {
2750 : : xl_restore_point *recordRestorePointData;
2751 : :
2752 : 4 : recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
2753 : :
2754 [ + + ]: 4 : if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
2755 : : {
2756 : 3 : recoveryStopAfter = true;
2757 : 3 : recoveryStopXid = InvalidTransactionId;
2758 : 3 : recoveryStopLSN = InvalidXLogRecPtr;
2759 : 3 : (void) getRecordTimestamp(record, &recoveryStopTime);
2760 : 3 : strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
2761 : :
2762 [ + - ]: 3 : ereport(LOG,
2763 : : (errmsg("recovery stopping at restore point \"%s\", time %s",
2764 : : recoveryStopName,
2765 : : timestamptz_to_str(recoveryStopTime))));
2766 : 3 : return true;
2767 : : }
2768 : : }
2769 : :
2770 : : /* Check if the target LSN has been reached */
2771 [ + + + + ]: 2734816 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
2772 : 10630 : recoveryTargetInclusive &&
2773 [ + + ]: 10630 : record->ReadRecPtr >= recoveryTargetLSN)
2774 : : {
2775 : 4 : recoveryStopAfter = true;
2776 : 4 : recoveryStopXid = InvalidTransactionId;
2777 : 4 : recoveryStopLSN = record->ReadRecPtr;
2778 : 4 : recoveryStopTime = 0;
2779 : 4 : recoveryStopName[0] = '\0';
2780 [ + - ]: 4 : ereport(LOG,
2781 : : (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
2782 : : LSN_FORMAT_ARGS(recoveryStopLSN))));
2783 : 4 : return true;
2784 : : }
2785 : :
2786 [ + + ]: 2734812 : if (rmid != RM_XACT_ID)
2787 : 2714361 : return false;
2788 : :
2789 : 20451 : xact_info = info & XLOG_XACT_OPMASK;
2790 : :
2791 [ + + + + ]: 20451 : if (xact_info == XLOG_XACT_COMMIT ||
2792 [ + + ]: 1846 : xact_info == XLOG_XACT_COMMIT_PREPARED ||
2793 [ + + ]: 243 : xact_info == XLOG_XACT_ABORT ||
2794 : : xact_info == XLOG_XACT_ABORT_PREPARED)
2795 : : {
2796 : : TransactionId recordXid;
2797 : :
2798 : : /* Update the last applied transaction timestamp */
2799 [ + - ]: 20220 : if (getRecordTimestamp(record, &recordXtime))
2800 : 20220 : SetLatestXTime(recordXtime);
2801 : :
2802 : : /* Extract the XID of the committed/aborted transaction */
2803 [ + + ]: 20220 : if (xact_info == XLOG_XACT_COMMIT_PREPARED)
2804 : : {
2805 : 24 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
2806 : : xl_xact_parsed_commit parsed;
2807 : :
2808 : 24 : ParseCommitRecord(XLogRecGetInfo(record),
2809 : : xlrec,
2810 : : &parsed);
2811 : 24 : recordXid = parsed.twophase_xid;
2812 : : }
2813 [ + + ]: 20196 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
2814 : : {
2815 : 12 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
2816 : : xl_xact_parsed_abort parsed;
2817 : :
2818 : 12 : ParseAbortRecord(XLogRecGetInfo(record),
2819 : : xlrec,
2820 : : &parsed);
2821 : 12 : recordXid = parsed.twophase_xid;
2822 : : }
2823 : : else
2824 : 20184 : recordXid = XLogRecGetXid(record);
2825 : :
2826 : : /*
2827 : : * There can be only one transaction end record with this exact
2828 : : * transactionid
2829 : : *
2830 : : * when testing for an xid, we MUST test for equality only, since
2831 : : * transactions are numbered in the order they start, not the order
2832 : : * they complete. A higher numbered xid will complete before you about
2833 : : * 50% of the time...
2834 : : */
2835 [ + + + - ]: 20220 : if (recoveryTarget == RECOVERY_TARGET_XID && recoveryTargetInclusive &&
2836 [ + + ]: 2 : recordXid == recoveryTargetXid)
2837 : : {
2838 : 1 : recoveryStopAfter = true;
2839 : 1 : recoveryStopXid = recordXid;
2840 : 1 : recoveryStopTime = recordXtime;
2841 : 1 : recoveryStopLSN = InvalidXLogRecPtr;
2842 : 1 : recoveryStopName[0] = '\0';
2843 : :
2844 [ - + - - ]: 1 : if (xact_info == XLOG_XACT_COMMIT ||
2845 : : xact_info == XLOG_XACT_COMMIT_PREPARED)
2846 : : {
2847 [ + - ]: 1 : ereport(LOG,
2848 : : (errmsg("recovery stopping after commit of transaction %u, time %s",
2849 : : recoveryStopXid,
2850 : : timestamptz_to_str(recoveryStopTime))));
2851 : : }
788 heikki.linnakangas@i 2852 [ # # # # ]:UBC 0 : else if (xact_info == XLOG_XACT_ABORT ||
2853 : : xact_info == XLOG_XACT_ABORT_PREPARED)
2854 : : {
2855 [ # # ]: 0 : ereport(LOG,
2856 : : (errmsg("recovery stopping after abort of transaction %u, time %s",
2857 : : recoveryStopXid,
2858 : : timestamptz_to_str(recoveryStopTime))));
2859 : : }
788 heikki.linnakangas@i 2860 :CBC 1 : return true;
2861 : : }
2862 : : }
2863 : :
2864 : : /* Check if we should stop as soon as reaching consistency */
2865 [ - + - - ]: 20450 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2866 : : {
788 heikki.linnakangas@i 2867 [ # # ]:UBC 0 : ereport(LOG,
2868 : : (errmsg("recovery stopping after reaching consistency")));
2869 : :
2870 : 0 : recoveryStopAfter = true;
2871 : 0 : recoveryStopXid = InvalidTransactionId;
2872 : 0 : recoveryStopTime = 0;
2873 : 0 : recoveryStopLSN = InvalidXLogRecPtr;
2874 : 0 : recoveryStopName[0] = '\0';
2875 : 0 : return true;
2876 : : }
2877 : :
788 heikki.linnakangas@i 2878 :CBC 20450 : return false;
2879 : : }
2880 : :
2881 : : /*
2882 : : * Create a comment for the history file to explain why and where
2883 : : * timeline changed.
2884 : : */
2885 : : static char *
2886 : 729 : getRecoveryStopReason(void)
2887 : : {
2888 : : char reason[200];
2889 : :
2890 [ - + ]: 729 : if (recoveryTarget == RECOVERY_TARGET_XID)
788 heikki.linnakangas@i 2891 :UBC 0 : snprintf(reason, sizeof(reason),
2892 : : "%s transaction %u",
2893 [ # # ]: 0 : recoveryStopAfter ? "after" : "before",
2894 : : recoveryStopXid);
788 heikki.linnakangas@i 2895 [ - + ]:CBC 729 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
788 heikki.linnakangas@i 2896 :UBC 0 : snprintf(reason, sizeof(reason),
2897 : : "%s %s\n",
2898 [ # # ]: 0 : recoveryStopAfter ? "after" : "before",
2899 : : timestamptz_to_str(recoveryStopTime));
788 heikki.linnakangas@i 2900 [ + + ]:CBC 729 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
788 heikki.linnakangas@i 2901 :GBC 5 : snprintf(reason, sizeof(reason),
2902 : : "%s LSN %X/%X\n",
788 heikki.linnakangas@i 2903 :CBC 5 : recoveryStopAfter ? "after" : "before",
2904 [ + + ]: 5 : LSN_FORMAT_ARGS(recoveryStopLSN));
2905 [ + + ]: 724 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
2906 : 3 : snprintf(reason, sizeof(reason),
2907 : : "at restore point \"%s\"",
2908 : : recoveryStopName);
2909 [ - + ]: 721 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
788 heikki.linnakangas@i 2910 :UBC 0 : snprintf(reason, sizeof(reason), "reached consistency");
2911 : : else
788 heikki.linnakangas@i 2912 :CBC 721 : snprintf(reason, sizeof(reason), "no recovery target specified");
2913 : :
2914 : 729 : return pstrdup(reason);
2915 : : }
2916 : :
2917 : : /*
2918 : : * Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
2919 : : *
2920 : : * endOfRecovery is true if the recovery target is reached and
2921 : : * the paused state starts at the end of recovery because of
2922 : : * recovery_target_action=pause, and false otherwise.
2923 : : */
2924 : : static void
2925 : 8 : recoveryPausesHere(bool endOfRecovery)
2926 : : {
2927 : : /* Don't pause unless users can connect! */
2928 [ - + ]: 8 : if (!LocalHotStandbyActive)
788 heikki.linnakangas@i 2929 :UBC 0 : return;
2930 : :
2931 : : /* Don't pause after standby promotion has been triggered */
788 heikki.linnakangas@i 2932 [ - + ]:CBC 8 : if (LocalPromoteIsTriggered)
788 heikki.linnakangas@i 2933 :UBC 0 : return;
2934 : :
788 heikki.linnakangas@i 2935 [ + + ]:CBC 8 : if (endOfRecovery)
2936 [ + - ]: 6 : ereport(LOG,
2937 : : (errmsg("pausing at the end of recovery"),
2938 : : errhint("Execute pg_wal_replay_resume() to promote.")));
2939 : : else
2940 [ + - ]: 2 : ereport(LOG,
2941 : : (errmsg("recovery has paused"),
2942 : : errhint("Execute pg_wal_replay_resume() to continue.")));
2943 : :
2944 : : /* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
2945 [ + + ]: 18 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
2946 : : {
2947 : 17 : HandleStartupProcInterrupts();
2948 [ + + ]: 17 : if (CheckForStandbyTrigger())
2949 : 1 : return;
2950 : :
2951 : : /*
2952 : : * If recovery pause is requested then set it paused. While we are in
2953 : : * the loop, user might resume and pause again so set this every time.
2954 : : */
2955 : 16 : ConfirmRecoveryPaused();
2956 : :
2957 : : /*
2958 : : * We wait on a condition variable that will wake us as soon as the
2959 : : * pause ends, but we use a timeout so we can check the above exit
2960 : : * condition periodically too.
2961 : : */
2962 : 16 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
2963 : : WAIT_EVENT_RECOVERY_PAUSE);
2964 : : }
2965 : 1 : ConditionVariableCancelSleep();
2966 : : }
2967 : :
2968 : : /*
2969 : : * When recovery_min_apply_delay is set, we wait long enough to make sure
2970 : : * certain record types are applied at least that interval behind the primary.
2971 : : *
2972 : : * Returns true if we waited.
2973 : : *
2974 : : * Note that the delay is calculated between the WAL record log time and
2975 : : * the current time on standby. We would prefer to keep track of when this
2976 : : * standby received each WAL record, which would allow a more consistent
2977 : : * approach and one not affected by time synchronisation issues, but that
2978 : : * is significantly more effort and complexity for little actual gain in
2979 : : * usability.
2980 : : */
2981 : : static bool
2982 : 2982024 : recoveryApplyDelay(XLogReaderState *record)
2983 : : {
2984 : : uint8 xact_info;
2985 : : TimestampTz xtime;
2986 : : TimestampTz delayUntil;
2987 : : long msecs;
2988 : :
2989 : : /* nothing to do if no delay configured */
2990 [ + + ]: 2982024 : if (recovery_min_apply_delay <= 0)
2991 : 2981973 : return false;
2992 : :
2993 : : /* no delay is applied on a database not yet consistent */
2994 [ + + ]: 51 : if (!reachedConsistency)
2995 : 4 : return false;
2996 : :
2997 : : /* nothing to do if crash recovery is requested */
2998 [ - + ]: 47 : if (!ArchiveRecoveryRequested)
788 heikki.linnakangas@i 2999 :UBC 0 : return false;
3000 : :
3001 : : /*
3002 : : * Is it a COMMIT record?
3003 : : *
3004 : : * We deliberately choose not to delay aborts since they have no effect on
3005 : : * MVCC. We already allow replay of records that don't have a timestamp,
3006 : : * so there is already opportunity for issues caused by early conflicts on
3007 : : * standbys.
3008 : : */
788 heikki.linnakangas@i 3009 [ + + ]:CBC 47 : if (XLogRecGetRmid(record) != RM_XACT_ID)
3010 : 43 : return false;
3011 : :
3012 : 4 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
3013 : :
3014 [ - + - - ]: 4 : if (xact_info != XLOG_XACT_COMMIT &&
3015 : : xact_info != XLOG_XACT_COMMIT_PREPARED)
788 heikki.linnakangas@i 3016 :UBC 0 : return false;
3017 : :
788 heikki.linnakangas@i 3018 [ - + ]:CBC 4 : if (!getRecordTimestamp(record, &xtime))
788 heikki.linnakangas@i 3019 :UBC 0 : return false;
3020 : :
788 heikki.linnakangas@i 3021 :CBC 4 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
3022 : :
3023 : : /*
3024 : : * Exit without arming the latch if it's already past time to apply this
3025 : : * record
3026 : : */
3027 : 4 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), delayUntil);
3028 [ + - ]: 4 : if (msecs <= 0)
788 heikki.linnakangas@i 3029 :UBC 0 : return false;
3030 : :
3031 : : while (true)
3032 : : {
788 heikki.linnakangas@i 3033 :CBC 11 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3034 : :
3035 : : /* This might change recovery_min_apply_delay. */
3036 : 11 : HandleStartupProcInterrupts();
3037 : :
3038 [ - + ]: 11 : if (CheckForStandbyTrigger())
788 heikki.linnakangas@i 3039 :UBC 0 : break;
3040 : :
3041 : : /*
3042 : : * Recalculate delayUntil as recovery_min_apply_delay could have
3043 : : * changed while waiting in this loop.
3044 : : */
788 heikki.linnakangas@i 3045 :CBC 11 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
3046 : :
3047 : : /*
3048 : : * Wait for difference between GetCurrentTimestamp() and delayUntil.
3049 : : */
3050 : 11 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
3051 : : delayUntil);
3052 : :
3053 [ + + ]: 11 : if (msecs <= 0)
3054 : 4 : break;
3055 : :
3056 [ - + ]: 7 : elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
3057 : :
3058 : 7 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3059 : : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
3060 : : msecs,
3061 : : WAIT_EVENT_RECOVERY_APPLY_DELAY);
3062 : : }
3063 : 4 : return true;
3064 : : }
3065 : :
3066 : : /*
3067 : : * Get the current state of the recovery pause request.
3068 : : */
3069 : : RecoveryPauseState
3070 : 22 : GetRecoveryPauseState(void)
3071 : : {
3072 : : RecoveryPauseState state;
3073 : :
3074 [ - + ]: 22 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3075 : 22 : state = XLogRecoveryCtl->recoveryPauseState;
3076 : 22 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3077 : :
3078 : 22 : return state;
3079 : : }
3080 : :
3081 : : /*
3082 : : * Set the recovery pause state.
3083 : : *
3084 : : * If recovery pause is requested then sets the recovery pause state to
3085 : : * 'pause requested' if it is not already 'paused'. Otherwise, sets it
3086 : : * to 'not paused' to resume the recovery. The recovery pause will be
3087 : : * confirmed by the ConfirmRecoveryPaused.
3088 : : */
3089 : : void
3090 : 48 : SetRecoveryPause(bool recoveryPause)
3091 : : {
3092 [ - + ]: 48 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3093 : :
3094 [ + + ]: 48 : if (!recoveryPause)
3095 : 40 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
3096 [ + - ]: 8 : else if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
3097 : 8 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
3098 : :
3099 : 48 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3100 : :
3101 [ + + ]: 48 : if (!recoveryPause)
3102 : 40 : ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV);
3103 : 48 : }
3104 : :
3105 : : /*
3106 : : * Confirm the recovery pause by setting the recovery pause state to
3107 : : * RECOVERY_PAUSED.
3108 : : */
3109 : : static void
3110 : 16 : ConfirmRecoveryPaused(void)
3111 : : {
3112 : : /* If recovery pause is requested then set it paused */
3113 [ - + ]: 16 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3114 [ + + ]: 16 : if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
3115 : 8 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED;
3116 : 16 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3117 : 16 : }
3118 : :
3119 : :
3120 : : /*
3121 : : * Attempt to read the next XLOG record.
3122 : : *
3123 : : * Before first call, the reader needs to be positioned to the first record
3124 : : * by calling XLogPrefetcherBeginRead().
3125 : : *
3126 : : * If no valid record is available, returns NULL, or fails if emode is PANIC.
3127 : : * (emode must be either PANIC, LOG). In standby mode, retries until a valid
3128 : : * record is available.
3129 : : */
3130 : : static XLogRecord *
738 tmunro@postgresql.or 3131 : 2983901 : ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
3132 : : bool fetching_ckpt, TimeLineID replayTLI)
3133 : : {
3134 : : XLogRecord *record;
3135 : 2983901 : XLogReaderState *xlogreader = XLogPrefetcherGetReader(xlogprefetcher);
788 heikki.linnakangas@i 3136 : 2983901 : XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
3137 : :
3138 : : /* Pass through parameters to XLogPageRead */
3139 : 2983901 : private->fetching_ckpt = fetching_ckpt;
3140 : 2983901 : private->emode = emode;
3141 : 2983901 : private->randAccess = (xlogreader->ReadRecPtr == InvalidXLogRecPtr);
3142 : 2983901 : private->replayTLI = replayTLI;
3143 : :
3144 : : /* This is the first attempt to read this page. */
3145 : 2983901 : lastSourceFailed = false;
3146 : :
3147 : : for (;;)
3148 : 214 : {
3149 : : char *errormsg;
3150 : :
738 tmunro@postgresql.or 3151 : 2984115 : record = XLogPrefetcherReadRecord(xlogprefetcher, &errormsg);
788 heikki.linnakangas@i 3152 [ + + ]: 2984032 : if (record == NULL)
3153 : : {
3154 : : /*
3155 : : * When we find that WAL ends in an incomplete record, keep track
3156 : : * of that record. After recovery is done, we'll write a record
3157 : : * to indicate to downstream WAL readers that that portion is to
3158 : : * be ignored.
3159 : : *
3160 : : * However, when ArchiveRecoveryRequested = true, we're going to
3161 : : * switch to a new timeline at the end of recovery. We will only
3162 : : * copy WAL over to the new timeline up to the end of the last
3163 : : * complete record, so if we did this, we would later create an
3164 : : * overwrite contrecord in the wrong place, breaking everything.
3165 : : */
594 rhaas@postgresql.org 3166 [ + + ]: 357 : if (!ArchiveRecoveryRequested &&
788 heikki.linnakangas@i 3167 [ + + ]: 102 : !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr))
3168 : : {
3169 : 10 : abortedRecPtr = xlogreader->abortedRecPtr;
3170 : 10 : missingContrecPtr = xlogreader->missingContrecPtr;
3171 : : }
3172 : :
3173 [ + + ]: 357 : if (readFile >= 0)
3174 : : {
3175 : 336 : close(readFile);
3176 : 336 : readFile = -1;
3177 : : }
3178 : :
3179 : : /*
3180 : : * We only end up here without a message when XLogPageRead()
3181 : : * failed - in that case we already logged something. In
3182 : : * StandbyMode that only happens if we have been triggered, so we
3183 : : * shouldn't loop anymore in that case.
3184 : : */
3185 [ + + ]: 357 : if (errormsg)
3186 [ + + ]: 336 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3187 : : (errmsg_internal("%s", errormsg) /* already translated */ ));
3188 : : }
3189 : :
3190 : : /*
3191 : : * Check page TLI is one of the expected values.
3192 : : */
3193 [ - + ]: 2983675 : else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
3194 : : {
3195 : : char fname[MAXFNAMELEN];
3196 : : XLogSegNo segno;
3197 : : int32 offset;
3198 : :
788 heikki.linnakangas@i 3199 :UBC 0 : XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
3200 : 0 : offset = XLogSegmentOffset(xlogreader->latestPagePtr,
3201 : : wal_segment_size);
3202 : 0 : XLogFileName(fname, xlogreader->seg.ws_tli, segno,
3203 : : wal_segment_size);
3204 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3205 : : (errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u",
3206 : : xlogreader->latestPageTLI,
3207 : : fname,
3208 : : LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
3209 : : offset)));
3210 : 0 : record = NULL;
3211 : : }
3212 : :
788 heikki.linnakangas@i 3213 [ + + ]:CBC 2984032 : if (record)
3214 : : {
3215 : : /* Great, got a record */
3216 : 2983818 : return record;
3217 : : }
3218 : : else
3219 : : {
3220 : : /* No valid record available from this source */
3221 : 357 : lastSourceFailed = true;
3222 : :
3223 : : /*
3224 : : * If archive recovery was requested, but we were still doing
3225 : : * crash recovery, switch to archive recovery and retry using the
3226 : : * offline archive. We have now replayed all the valid WAL in
3227 : : * pg_wal, so we are presumably now consistent.
3228 : : *
3229 : : * We require that there's at least some valid WAL present in
3230 : : * pg_wal, however (!fetching_ckpt). We could recover using the
3231 : : * WAL from the archive, even if pg_wal is completely empty, but
3232 : : * we'd have no idea how far we'd have to replay to reach
3233 : : * consistency. So err on the safe side and give up.
3234 : : */
3235 [ + + + + ]: 357 : if (!InArchiveRecovery && ArchiveRecoveryRequested &&
3236 [ + - ]: 4 : !fetching_ckpt)
3237 : : {
3238 [ - + ]: 4 : ereport(DEBUG1,
3239 : : (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
3240 : 4 : InArchiveRecovery = true;
3241 [ + - ]: 4 : if (StandbyModeRequested)
433 rhaas@postgresql.org 3242 : 4 : EnableStandbyMode();
3243 : :
788 heikki.linnakangas@i 3244 : 4 : SwitchIntoArchiveRecovery(xlogreader->EndRecPtr, replayTLI);
3245 : 4 : minRecoveryPoint = xlogreader->EndRecPtr;
3246 : 4 : minRecoveryPointTLI = replayTLI;
3247 : :
3248 : 4 : CheckRecoveryConsistency();
3249 : :
3250 : : /*
3251 : : * Before we retry, reset lastSourceFailed and currentSource
3252 : : * so that we will check the archive next.
3253 : : */
3254 : 4 : lastSourceFailed = false;
3255 : 4 : currentSource = XLOG_FROM_ANY;
3256 : :
3257 : 214 : continue;
3258 : : }
3259 : :
3260 : : /* In standby mode, loop back to retry. Otherwise, give up. */
3261 [ + + + + ]: 353 : if (StandbyMode && !CheckForStandbyTrigger())
3262 : 210 : continue;
3263 : : else
3264 : 143 : return NULL;
3265 : : }
3266 : : }
3267 : : }
3268 : :
3269 : : /*
3270 : : * Read the XLOG page containing targetPagePtr into readBuf (if not read
3271 : : * already). Returns number of bytes read, if the page is read successfully,
3272 : : * or XLREAD_FAIL in case of errors. When errors occur, they are ereport'ed,
3273 : : * but only if they have not been previously reported.
3274 : : *
3275 : : * See XLogReaderRoutine.page_read for more details.
3276 : : *
3277 : : * While prefetching, xlogreader->nonblocking may be set. In that case,
3278 : : * returns XLREAD_WOULDBLOCK if we'd otherwise have to wait for more WAL.
3279 : : *
3280 : : * This is responsible for restoring files from archive as needed, as well
3281 : : * as for waiting for the requested WAL record to arrive in standby mode.
3282 : : *
3283 : : * xlogreader->private_data->emode specifies the log level used for reporting
3284 : : * "file not found" or "end of WAL" situations in archive recovery, or in
3285 : : * standby mode when promotion is triggered. If set to WARNING or below,
3286 : : * XLogPageRead() returns XLREAD_FAIL in those situations, on higher log
3287 : : * levels the ereport() won't return.
3288 : : *
3289 : : * In standby mode, if after a successful return of XLogPageRead() the
3290 : : * caller finds the record it's interested in to be broken, it should
3291 : : * ereport the error with the level determined by
3292 : : * emode_for_corrupt_record(), and then set lastSourceFailed
3293 : : * and call XLogPageRead() again with the same arguments. This lets
3294 : : * XLogPageRead() to try fetching the record from another source, or to
3295 : : * sleep and retry.
3296 : : */
3297 : : static int
3298 : 101721 : XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
3299 : : XLogRecPtr targetRecPtr, char *readBuf)
3300 : : {
3301 : 101721 : XLogPageReadPrivate *private =
3302 : : (XLogPageReadPrivate *) xlogreader->private_data;
3303 : 101721 : int emode = private->emode;
3304 : : uint32 targetPageOff;
3305 : : XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
3306 : : int r;
3307 : :
3308 : 101721 : XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
3309 : 101721 : targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
3310 : :
3311 : : /*
3312 : : * See if we need to switch to a new segment because the requested record
3313 : : * is not in the currently open one.
3314 : : */
3315 [ + + ]: 101721 : if (readFile >= 0 &&
3316 [ + + ]: 100408 : !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size))
3317 : : {
3318 : : /*
3319 : : * Request a restartpoint if we've replayed too much xlog since the
3320 : : * last one.
3321 : : */
3322 [ + + + - ]: 189 : if (ArchiveRecoveryRequested && IsUnderPostmaster)
3323 : : {
3324 [ + + ]: 176 : if (XLogCheckpointNeeded(readSegNo))
3325 : : {
3326 : 35 : (void) GetRedoRecPtr();
3327 [ + + ]: 35 : if (XLogCheckpointNeeded(readSegNo))
3328 : 30 : RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
3329 : : }
3330 : : }
3331 : :
3332 : 189 : close(readFile);
3333 : 189 : readFile = -1;
3334 : 189 : readSource = XLOG_FROM_ANY;
3335 : : }
3336 : :
3337 : 101721 : XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size);
3338 : :
3339 : 101727 : retry:
3340 : : /* See if we need to retrieve more data */
3341 [ + + ]: 101727 : if (readFile < 0 ||
3342 [ + + ]: 100219 : (readSource == XLOG_FROM_STREAM &&
3343 [ + + ]: 82597 : flushedUpto < targetPagePtr + reqLen))
3344 : : {
738 tmunro@postgresql.or 3345 [ + + ]: 27893 : if (readFile >= 0 &&
3346 [ + + ]: 26385 : xlogreader->nonblocking &&
3347 [ + - ]: 12976 : readSource == XLOG_FROM_STREAM &&
3348 [ + - ]: 12976 : flushedUpto < targetPagePtr + reqLen)
3349 : 12976 : return XLREAD_WOULDBLOCK;
3350 : :
3351 [ + + + - ]: 14834 : switch (WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
3352 : 14917 : private->randAccess,
3353 : 14917 : private->fetching_ckpt,
3354 : : targetRecPtr,
3355 : : private->replayTLI,
3356 : : xlogreader->EndRecPtr,
3357 : 14917 : xlogreader->nonblocking))
3358 : : {
3359 : 117 : case XLREAD_WOULDBLOCK:
3360 : 117 : return XLREAD_WOULDBLOCK;
3361 : 38 : case XLREAD_FAIL:
3362 [ - + ]: 38 : if (readFile >= 0)
738 tmunro@postgresql.or 3363 :UBC 0 : close(readFile);
738 tmunro@postgresql.or 3364 :CBC 38 : readFile = -1;
3365 : 38 : readLen = 0;
3366 : 38 : readSource = XLOG_FROM_ANY;
3367 : 38 : return XLREAD_FAIL;
3368 : 14679 : case XLREAD_SUCCESS:
3369 : 14679 : break;
3370 : : }
3371 : : }
3372 : :
3373 : : /*
3374 : : * At this point, we have the right segment open and if we're streaming we
3375 : : * know the requested record is in it.
3376 : : */
788 heikki.linnakangas@i 3377 [ - + ]: 88513 : Assert(readFile != -1);
3378 : :
3379 : : /*
3380 : : * If the current segment is being streamed from the primary, calculate
3381 : : * how much of the current page we have received already. We know the
3382 : : * requested record has been received, but this is for the benefit of
3383 : : * future calls, to allow quick exit at the top of this function.
3384 : : */
3385 [ + + ]: 88513 : if (readSource == XLOG_FROM_STREAM)
3386 : : {
3387 [ + + ]: 69684 : if (((targetPagePtr) / XLOG_BLCKSZ) != (flushedUpto / XLOG_BLCKSZ))
3388 : 56739 : readLen = XLOG_BLCKSZ;
3389 : : else
3390 : 12945 : readLen = XLogSegmentOffset(flushedUpto, wal_segment_size) -
3391 : : targetPageOff;
3392 : : }
3393 : : else
3394 : 18829 : readLen = XLOG_BLCKSZ;
3395 : :
3396 : : /* Read the requested page */
3397 : 88513 : readOff = targetPageOff;
3398 : :
3399 : 88513 : pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
563 tmunro@postgresql.or 3400 : 88513 : r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
788 heikki.linnakangas@i 3401 [ - + ]: 88513 : if (r != XLOG_BLCKSZ)
3402 : : {
3403 : : char fname[MAXFNAMELEN];
788 heikki.linnakangas@i 3404 :UBC 0 : int save_errno = errno;
3405 : :
3406 : 0 : pgstat_report_wait_end();
3407 : 0 : XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
3408 [ # # ]: 0 : if (r < 0)
3409 : : {
3410 : 0 : errno = save_errno;
3411 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3412 : : (errcode_for_file_access(),
3413 : : errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m",
3414 : : fname, LSN_FORMAT_ARGS(targetPagePtr),
3415 : : readOff)));
3416 : : }
3417 : : else
3418 [ # # ]: 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3419 : : (errcode(ERRCODE_DATA_CORRUPTED),
3420 : : errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu",
3421 : : fname, LSN_FORMAT_ARGS(targetPagePtr),
3422 : : readOff, r, (Size) XLOG_BLCKSZ)));
3423 : 0 : goto next_record_is_invalid;
3424 : : }
788 heikki.linnakangas@i 3425 :CBC 88513 : pgstat_report_wait_end();
3426 : :
3427 [ - + ]: 88513 : Assert(targetSegNo == readSegNo);
3428 [ - + ]: 88513 : Assert(targetPageOff == readOff);
3429 [ - + ]: 88513 : Assert(reqLen <= readLen);
3430 : :
3431 : 88513 : xlogreader->seg.ws_tli = curFileTLI;
3432 : :
3433 : : /*
3434 : : * Check the page header immediately, so that we can retry immediately if
3435 : : * it's not valid. This may seem unnecessary, because ReadPageInternal()
3436 : : * validates the page header anyway, and would propagate the failure up to
3437 : : * ReadRecord(), which would retry. However, there's a corner case with
3438 : : * continuation records, if a record is split across two pages such that
3439 : : * we would need to read the two pages from different sources. For
3440 : : * example, imagine a scenario where a streaming replica is started up,
3441 : : * and replay reaches a record that's split across two WAL segments. The
3442 : : * first page is only available locally, in pg_wal, because it's already
3443 : : * been recycled on the primary. The second page, however, is not present
3444 : : * in pg_wal, and we should stream it from the primary. There is a
3445 : : * recycled WAL segment present in pg_wal, with garbage contents, however.
3446 : : * We would read the first page from the local WAL segment, but when
3447 : : * reading the second page, we would read the bogus, recycled, WAL
3448 : : * segment. If we didn't catch that case here, we would never recover,
3449 : : * because ReadRecord() would retry reading the whole record from the
3450 : : * beginning.
3451 : : *
3452 : : * Of course, this only catches errors in the page header, which is what
3453 : : * happens in the case of a recycled WAL segment. Other kinds of errors or
3454 : : * corruption still has the same problem. But this at least fixes the
3455 : : * common case, which can happen as part of normal operation.
3456 : : *
3457 : : * Validating the page header is cheap enough that doing it twice
3458 : : * shouldn't be a big deal from a performance point of view.
3459 : : *
3460 : : * When not in standby mode, an invalid page header should cause recovery
3461 : : * to end, not retry reading the page, so we don't need to validate the
3462 : : * page header here for the retry. Instead, ReadPageInternal() is
3463 : : * responsible for the validation.
3464 : : */
3465 [ + + ]: 88513 : if (StandbyMode &&
3466 [ + + ]: 78413 : !XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
3467 : : {
3468 : : /*
3469 : : * Emit this error right now then retry this page immediately. Use
3470 : : * errmsg_internal() because the message was already translated.
3471 : : */
3472 [ + - ]: 9 : if (xlogreader->errormsg_buf[0])
3473 [ + + ]: 9 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3474 : : (errmsg_internal("%s", xlogreader->errormsg_buf)));
3475 : :
3476 : : /* reset any error XLogReaderValidatePageHeader() might have set */
589 tmunro@postgresql.or 3477 : 9 : XLogReaderResetError(xlogreader);
788 heikki.linnakangas@i 3478 : 9 : goto next_record_is_invalid;
3479 : : }
3480 : :
3481 : 88504 : return readLen;
3482 : :
3483 : 9 : next_record_is_invalid:
3484 : :
3485 : : /*
3486 : : * If we're reading ahead, give up fast. Retries and error reporting will
3487 : : * be handled by a later read when recovery catches up to this point.
3488 : : */
589 tmunro@postgresql.or 3489 [ + + ]: 9 : if (xlogreader->nonblocking)
3490 : 3 : return XLREAD_WOULDBLOCK;
3491 : :
788 heikki.linnakangas@i 3492 : 6 : lastSourceFailed = true;
3493 : :
3494 [ + - ]: 6 : if (readFile >= 0)
3495 : 6 : close(readFile);
3496 : 6 : readFile = -1;
3497 : 6 : readLen = 0;
3498 : 6 : readSource = XLOG_FROM_ANY;
3499 : :
3500 : : /* In standby-mode, keep trying */
3501 [ + - ]: 6 : if (StandbyMode)
3502 : 6 : goto retry;
3503 : : else
738 tmunro@postgresql.or 3504 :UBC 0 : return XLREAD_FAIL;
3505 : : }
3506 : :
3507 : : /*
3508 : : * Open the WAL segment containing WAL location 'RecPtr'.
3509 : : *
3510 : : * The segment can be fetched via restore_command, or via walreceiver having
3511 : : * streamed the record, or it can already be present in pg_wal. Checking
3512 : : * pg_wal is mainly for crash recovery, but it will be polled in standby mode
3513 : : * too, in case someone copies a new segment directly to pg_wal. That is not
3514 : : * documented or recommended, though.
3515 : : *
3516 : : * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
3517 : : * prepare to read WAL starting from RedoStartLSN after this.
3518 : : *
3519 : : * 'RecPtr' might not point to the beginning of the record we're interested
3520 : : * in, it might also point to the page or segment header. In that case,
3521 : : * 'tliRecPtr' is the position of the WAL record we're interested in. It is
3522 : : * used to decide which timeline to stream the requested WAL from.
3523 : : *
3524 : : * 'replayLSN' is the current replay LSN, so that if we scan for new
3525 : : * timelines, we can reject a switch to a timeline that branched off before
3526 : : * this point.
3527 : : *
3528 : : * If the record is not immediately available, the function returns false
3529 : : * if we're not in standby mode. In standby mode, waits for it to become
3530 : : * available.
3531 : : *
3532 : : * When the requested record becomes available, the function opens the file
3533 : : * containing it (if not open already), and returns XLREAD_SUCCESS. When end
3534 : : * of standby mode is triggered by the user, and there is no more WAL
3535 : : * available, returns XLREAD_FAIL.
3536 : : *
3537 : : * If nonblocking is true, then give up immediately if we can't satisfy the
3538 : : * request, returning XLREAD_WOULDBLOCK instead of waiting.
3539 : : */
3540 : : static XLogPageReadResult
788 heikki.linnakangas@i 3541 :CBC 14917 : WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
3542 : : bool fetching_ckpt, XLogRecPtr tliRecPtr,
3543 : : TimeLineID replayTLI, XLogRecPtr replayLSN,
3544 : : bool nonblocking)
3545 : : {
3546 : : static TimestampTz last_fail_time = 0;
3547 : : TimestampTz now;
3548 : 14917 : bool streaming_reply_sent = false;
3549 : :
3550 : : /*-------
3551 : : * Standby mode is implemented by a state machine:
3552 : : *
3553 : : * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
3554 : : * pg_wal (XLOG_FROM_PG_WAL)
3555 : : * 2. Check for promotion trigger request
3556 : : * 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
3557 : : * 4. Rescan timelines
3558 : : * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
3559 : : *
3560 : : * Failure to read from the current source advances the state machine to
3561 : : * the next state.
3562 : : *
3563 : : * 'currentSource' indicates the current state. There are no currentSource
3564 : : * values for "check trigger", "rescan timelines", and "sleep" states,
3565 : : * those actions are taken when reading from the previous source fails, as
3566 : : * part of advancing to the next state.
3567 : : *
3568 : : * If standby mode is turned off while reading WAL from stream, we move
3569 : : * to XLOG_FROM_ARCHIVE and reset lastSourceFailed, to force fetching
3570 : : * the files (which would be required at end of recovery, e.g., timeline
3571 : : * history file) from archive or pg_wal. We don't need to kill WAL receiver
3572 : : * here because it's already stopped when standby mode is turned off at
3573 : : * the end of recovery.
3574 : : *-------
3575 : : */
3576 [ + + ]: 14917 : if (!InArchiveRecovery)
3577 : 793 : currentSource = XLOG_FROM_PG_WAL;
3578 [ + + ]: 14124 : else if (currentSource == XLOG_FROM_ANY ||
3579 [ + + - + ]: 13979 : (!StandbyMode && currentSource == XLOG_FROM_STREAM))
3580 : : {
3581 : 145 : lastSourceFailed = false;
3582 : 145 : currentSource = XLOG_FROM_ARCHIVE;
3583 : : }
3584 : :
3585 : : for (;;)
3586 : 13698 : {
3587 : 28615 : XLogSource oldSource = currentSource;
3588 : 28615 : bool startWalReceiver = false;
3589 : :
3590 : : /*
3591 : : * First check if we failed to read from the current source, and
3592 : : * advance the state machine if so. The failure to read might've
3593 : : * happened outside this function, e.g when a CRC check fails on a
3594 : : * record, or within this loop.
3595 : : */
3596 [ + + ]: 28615 : if (lastSourceFailed)
3597 : : {
3598 : : /*
3599 : : * Don't allow any retry loops to occur during nonblocking
3600 : : * readahead. Let the caller process everything that has been
3601 : : * decoded already first.
3602 : : */
738 tmunro@postgresql.or 3603 [ + + ]: 788 : if (nonblocking)
3604 : 83 : return XLREAD_WOULDBLOCK;
3605 : :
788 heikki.linnakangas@i 3606 [ + + - ]: 705 : switch (currentSource)
3607 : : {
3608 : 395 : case XLOG_FROM_ARCHIVE:
3609 : : case XLOG_FROM_PG_WAL:
3610 : :
3611 : : /*
3612 : : * Check to see if promotion is requested. Note that we do
3613 : : * this only after failure, so when you promote, we still
3614 : : * finish replaying as much as we can from archive and
3615 : : * pg_wal before failover.
3616 : : */
3617 [ + + + + ]: 395 : if (StandbyMode && CheckForStandbyTrigger())
3618 : : {
3619 : 20 : XLogShutdownWalRcv();
738 tmunro@postgresql.or 3620 : 20 : return XLREAD_FAIL;
3621 : : }
3622 : :
3623 : : /*
3624 : : * Not in standby mode, and we've now tried the archive
3625 : : * and pg_wal.
3626 : : */
788 heikki.linnakangas@i 3627 [ + + ]: 375 : if (!StandbyMode)
738 tmunro@postgresql.or 3628 : 18 : return XLREAD_FAIL;
3629 : :
3630 : : /*
3631 : : * Move to XLOG_FROM_STREAM state, and set to start a
3632 : : * walreceiver if necessary.
3633 : : */
788 heikki.linnakangas@i 3634 : 357 : currentSource = XLOG_FROM_STREAM;
3635 : 357 : startWalReceiver = true;
3636 : 357 : break;
3637 : :
3638 : 310 : case XLOG_FROM_STREAM:
3639 : :
3640 : : /*
3641 : : * Failure while streaming. Most likely, we got here
3642 : : * because streaming replication was terminated, or
3643 : : * promotion was triggered. But we also get here if we
3644 : : * find an invalid record in the WAL streamed from the
3645 : : * primary, in which case something is seriously wrong.
3646 : : * There's little chance that the problem will just go
3647 : : * away, but PANIC is not good for availability either,
3648 : : * especially in hot standby mode. So, we treat that the
3649 : : * same as disconnection, and retry from archive/pg_wal
3650 : : * again. The WAL in the archive should be identical to
3651 : : * what was streamed, so it's unlikely that it helps, but
3652 : : * one can hope...
3653 : : */
3654 : :
3655 : : /*
3656 : : * We should be able to move to XLOG_FROM_STREAM only in
3657 : : * standby mode.
3658 : : */
3659 [ - + ]: 310 : Assert(StandbyMode);
3660 : :
3661 : : /*
3662 : : * Before we leave XLOG_FROM_STREAM state, make sure that
3663 : : * walreceiver is not active, so that it won't overwrite
3664 : : * WAL that we restore from archive.
3665 : : */
577 noah@leadboat.com 3666 : 310 : XLogShutdownWalRcv();
3667 : :
3668 : : /*
3669 : : * Before we sleep, re-scan for possible new timelines if
3670 : : * we were requested to recover to the latest timeline.
3671 : : */
788 heikki.linnakangas@i 3672 [ + - ]: 310 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
3673 : : {
3674 [ + + ]: 310 : if (rescanLatestTimeLine(replayTLI, replayLSN))
3675 : : {
3676 : 11 : currentSource = XLOG_FROM_ARCHIVE;
3677 : 11 : break;
3678 : : }
3679 : : }
3680 : :
3681 : : /*
3682 : : * XLOG_FROM_STREAM is the last state in our state
3683 : : * machine, so we've exhausted all the options for
3684 : : * obtaining the requested WAL. We're going to loop back
3685 : : * and retry from the archive, but if it hasn't been long
3686 : : * since last attempt, sleep wal_retrieve_retry_interval
3687 : : * milliseconds to avoid busy-waiting.
3688 : : */
3689 : 299 : now = GetCurrentTimestamp();
3690 [ + + ]: 299 : if (!TimestampDifferenceExceeds(last_fail_time, now,
3691 : : wal_retrieve_retry_interval))
3692 : : {
3693 : : long wait_time;
3694 : :
3695 : 428 : wait_time = wal_retrieve_retry_interval -
3696 : 214 : TimestampDifferenceMilliseconds(last_fail_time, now);
3697 : :
3698 [ + - ]: 214 : elog(LOG, "waiting for WAL to become available at %X/%X",
3699 : : LSN_FORMAT_ARGS(RecPtr));
3700 : :
3701 : : /* Do background tasks that might benefit us later. */
502 tgl@sss.pgh.pa.us 3702 : 214 : KnownAssignedTransactionIdsIdleMaintenance();
3703 : :
788 heikki.linnakangas@i 3704 : 214 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3705 : : WL_LATCH_SET | WL_TIMEOUT |
3706 : : WL_EXIT_ON_PM_DEATH,
3707 : : wait_time,
3708 : : WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
3709 : 187 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3710 : 187 : now = GetCurrentTimestamp();
3711 : :
3712 : : /* Handle interrupt signals of startup process */
3713 : 187 : HandleStartupProcInterrupts();
3714 : : }
3715 : 261 : last_fail_time = now;
3716 : 261 : currentSource = XLOG_FROM_ARCHIVE;
3717 : 261 : break;
3718 : :
788 heikki.linnakangas@i 3719 :UBC 0 : default:
3720 [ # # ]: 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
3721 : : }
3722 : : }
788 heikki.linnakangas@i 3723 [ + + ]:CBC 27827 : else if (currentSource == XLOG_FROM_PG_WAL)
3724 : : {
3725 : : /*
3726 : : * We just successfully read a file in pg_wal. We prefer files in
3727 : : * the archive over ones in pg_wal, so try the next file again
3728 : : * from the archive first.
3729 : : */
3730 [ - + ]: 791 : if (InArchiveRecovery)
788 heikki.linnakangas@i 3731 :UBC 0 : currentSource = XLOG_FROM_ARCHIVE;
3732 : : }
3733 : :
788 heikki.linnakangas@i 3734 [ + + ]:CBC 28456 : if (currentSource != oldSource)
3735 [ + + + - ]: 629 : elog(DEBUG2, "switched WAL source from %s to %s after %s",
3736 : : xlogSourceNames[oldSource], xlogSourceNames[currentSource],
3737 : : lastSourceFailed ? "failure" : "success");
3738 : :
3739 : : /*
3740 : : * We've now handled possible failure. Try to read from the chosen
3741 : : * source.
3742 : : */
3743 : 28456 : lastSourceFailed = false;
3744 : :
3745 [ + + - ]: 28456 : switch (currentSource)
3746 : : {
3747 : 1451 : case XLOG_FROM_ARCHIVE:
3748 : : case XLOG_FROM_PG_WAL:
3749 : :
3750 : : /*
3751 : : * WAL receiver must not be running when reading WAL from
3752 : : * archive or pg_wal.
3753 : : */
3754 [ - + ]: 1451 : Assert(!WalRcvStreaming());
3755 : :
3756 : : /* Close any old file we might have open. */
3757 [ + + ]: 1451 : if (readFile >= 0)
3758 : : {
3759 : 124 : close(readFile);
3760 : 124 : readFile = -1;
3761 : : }
3762 : : /* Reset curFileTLI if random fetch. */
3763 [ + + ]: 1451 : if (randAccess)
3764 : 968 : curFileTLI = 0;
3765 : :
3766 : : /*
3767 : : * Try to restore the file from archive, or read an existing
3768 : : * file from pg_wal.
3769 : : */
3770 : 1451 : readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
3771 [ + + ]: 1451 : currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
3772 : : currentSource);
3773 [ + + ]: 1451 : if (readFile >= 0)
738 tmunro@postgresql.or 3774 : 1207 : return XLREAD_SUCCESS; /* success! */
3775 : :
3776 : : /*
3777 : : * Nope, not found in archive or pg_wal.
3778 : : */
788 heikki.linnakangas@i 3779 : 244 : lastSourceFailed = true;
3780 : 244 : break;
3781 : :
3782 : 27005 : case XLOG_FROM_STREAM:
3783 : : {
3784 : : bool havedata;
3785 : :
3786 : : /*
3787 : : * We should be able to move to XLOG_FROM_STREAM only in
3788 : : * standby mode.
3789 : : */
3790 [ - + ]: 27005 : Assert(StandbyMode);
3791 : :
3792 : : /*
3793 : : * First, shutdown walreceiver if its restart has been
3794 : : * requested -- but no point if we're already slated for
3795 : : * starting it.
3796 : : */
3797 [ + + + - ]: 27005 : if (pendingWalRcvRestart && !startWalReceiver)
3798 : : {
3799 : 4 : XLogShutdownWalRcv();
3800 : :
3801 : : /*
3802 : : * Re-scan for possible new timelines if we were
3803 : : * requested to recover to the latest timeline.
3804 : : */
3805 [ + - ]: 4 : if (recoveryTargetTimeLineGoal ==
3806 : : RECOVERY_TARGET_TIMELINE_LATEST)
3807 : 4 : rescanLatestTimeLine(replayTLI, replayLSN);
3808 : :
3809 : 4 : startWalReceiver = true;
3810 : : }
3811 : 27005 : pendingWalRcvRestart = false;
3812 : :
3813 : : /*
3814 : : * Launch walreceiver if needed.
3815 : : *
3816 : : * If fetching_ckpt is true, RecPtr points to the initial
3817 : : * checkpoint location. In that case, we use RedoStartLSN
3818 : : * as the streaming start position instead of RecPtr, so
3819 : : * that when we later jump backwards to start redo at
3820 : : * RedoStartLSN, we will have the logs streamed already.
3821 : : */
3822 [ + + + - ]: 27005 : if (startWalReceiver &&
3823 [ + + ]: 361 : PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
3824 : : {
3825 : : XLogRecPtr ptr;
3826 : : TimeLineID tli;
3827 : :
3828 [ + + ]: 321 : if (fetching_ckpt)
3829 : : {
3830 : 1 : ptr = RedoStartLSN;
3831 : 1 : tli = RedoStartTLI;
3832 : : }
3833 : : else
3834 : : {
3835 : 320 : ptr = RecPtr;
3836 : :
3837 : : /*
3838 : : * Use the record begin position to determine the
3839 : : * TLI, rather than the position we're reading.
3840 : : */
3841 : 320 : tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
3842 : :
3843 [ + - - + ]: 320 : if (curFileTLI > 0 && tli < curFileTLI)
788 heikki.linnakangas@i 3844 [ # # ]:UBC 0 : elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
3845 : : LSN_FORMAT_ARGS(tliRecPtr),
3846 : : tli, curFileTLI);
3847 : : }
788 heikki.linnakangas@i 3848 :CBC 321 : curFileTLI = tli;
3849 : 321 : SetInstallXLogFileSegmentActive();
3850 : 321 : RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
3851 : : PrimarySlotName,
3852 : : wal_receiver_create_temp_slot);
3853 : 321 : flushedUpto = 0;
3854 : : }
3855 : :
3856 : : /*
3857 : : * Check if WAL receiver is active or wait to start up.
3858 : : */
3859 [ + + ]: 27005 : if (!WalRcvStreaming())
3860 : : {
3861 : 285 : lastSourceFailed = true;
3862 : 285 : break;
3863 : : }
3864 : :
3865 : : /*
3866 : : * Walreceiver is active, so see if new data has arrived.
3867 : : *
3868 : : * We only advance XLogReceiptTime when we obtain fresh
3869 : : * WAL from walreceiver and observe that we had already
3870 : : * processed everything before the most recent "chunk"
3871 : : * that it flushed to disk. In steady state where we are
3872 : : * keeping up with the incoming data, XLogReceiptTime will
3873 : : * be updated on each cycle. When we are behind,
3874 : : * XLogReceiptTime will not advance, so the grace time
3875 : : * allotted to conflicting queries will decrease.
3876 : : */
3877 [ + + ]: 26720 : if (RecPtr < flushedUpto)
3878 : 248 : havedata = true;
3879 : : else
3880 : : {
3881 : : XLogRecPtr latestChunkStart;
3882 : :
3883 : 26472 : flushedUpto = GetWalRcvFlushRecPtr(&latestChunkStart, &receiveTLI);
3884 [ + + + - ]: 26472 : if (RecPtr < flushedUpto && receiveTLI == curFileTLI)
3885 : : {
3886 : 13457 : havedata = true;
3887 [ + + ]: 13457 : if (latestChunkStart <= RecPtr)
3888 : : {
3889 : 12333 : XLogReceiptTime = GetCurrentTimestamp();
3890 : 12333 : SetCurrentChunkStartTime(XLogReceiptTime);
3891 : : }
3892 : : }
3893 : : else
3894 : 13015 : havedata = false;
3895 : : }
3896 [ + + ]: 26720 : if (havedata)
3897 : : {
3898 : : /*
3899 : : * Great, streamed far enough. Open the file if it's
3900 : : * not open already. Also read the timeline history
3901 : : * file if we haven't initialized timeline history
3902 : : * yet; it should be streamed over and present in
3903 : : * pg_wal by now. Use XLOG_FROM_STREAM so that source
3904 : : * info is set correctly and XLogReceiptTime isn't
3905 : : * changed.
3906 : : *
3907 : : * NB: We must set readTimeLineHistory based on
3908 : : * recoveryTargetTLI, not receiveTLI. Normally they'll
3909 : : * be the same, but if recovery_target_timeline is
3910 : : * 'latest' and archiving is configured, then it's
3911 : : * possible that we managed to retrieve one or more
3912 : : * new timeline history files from the archive,
3913 : : * updating recoveryTargetTLI.
3914 : : */
3915 [ + + ]: 13705 : if (readFile < 0)
3916 : : {
3917 [ + + ]: 233 : if (!expectedTLEs)
3918 : 1 : expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
3919 : 233 : readFile = XLogFileRead(readSegNo, PANIC,
3920 : : receiveTLI,
3921 : : XLOG_FROM_STREAM, false);
3922 [ - + ]: 233 : Assert(readFile >= 0);
3923 : : }
3924 : : else
3925 : : {
3926 : : /* just make sure source info is correct... */
3927 : 13472 : readSource = XLOG_FROM_STREAM;
3928 : 13472 : XLogReceiptSource = XLOG_FROM_STREAM;
738 tmunro@postgresql.or 3929 : 13472 : return XLREAD_SUCCESS;
3930 : : }
788 heikki.linnakangas@i 3931 : 233 : break;
3932 : : }
3933 : :
3934 : : /* In nonblocking mode, return rather than sleeping. */
738 tmunro@postgresql.or 3935 [ + + ]: 13015 : if (nonblocking)
3936 : 34 : return XLREAD_WOULDBLOCK;
3937 : :
3938 : : /*
3939 : : * Data not here yet. Check for trigger, then wait for
3940 : : * walreceiver to wake us up when new WAL arrives.
3941 : : */
788 heikki.linnakangas@i 3942 [ + + ]: 12981 : if (CheckForStandbyTrigger())
3943 : : {
3944 : : /*
3945 : : * Note that we don't return XLREAD_FAIL immediately
3946 : : * here. After being triggered, we still want to
3947 : : * replay all the WAL that was already streamed. It's
3948 : : * in pg_wal now, so we just treat this as a failure,
3949 : : * and the state machine will move on to replay the
3950 : : * streamed WAL from pg_wal, and then recheck the
3951 : : * trigger and exit replay.
3952 : : */
3953 : 26 : lastSourceFailed = true;
3954 : 26 : break;
3955 : : }
3956 : :
3957 : : /*
3958 : : * Since we have replayed everything we have received so
3959 : : * far and are about to start waiting for more WAL, let's
3960 : : * tell the upstream server our replay location now so
3961 : : * that pg_stat_replication doesn't show stale
3962 : : * information.
3963 : : */
3964 [ + + ]: 12955 : if (!streaming_reply_sent)
3965 : : {
3966 : 11192 : WalRcvForceReply();
3967 : 11192 : streaming_reply_sent = true;
3968 : : }
3969 : :
3970 : : /* Do any background tasks that might benefit us later. */
502 tgl@sss.pgh.pa.us 3971 : 12955 : KnownAssignedTransactionIdsIdleMaintenance();
3972 : :
3973 : : /* Update pg_stat_recovery_prefetch before sleeping. */
738 tmunro@postgresql.or 3974 : 12955 : XLogPrefetcherComputeStats(xlogprefetcher);
3975 : :
3976 : : /*
3977 : : * Wait for more WAL to arrive, when we will be woken
3978 : : * immediately by the WAL receiver.
3979 : : */
788 heikki.linnakangas@i 3980 : 12955 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3981 : : WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
3982 : : -1L,
3983 : : WAIT_EVENT_RECOVERY_WAL_STREAM);
3984 : 12946 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3985 : 12946 : break;
3986 : : }
3987 : :
788 heikki.linnakangas@i 3988 :UBC 0 : default:
3989 [ # # ]: 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
3990 : : }
3991 : :
3992 : : /*
3993 : : * Check for recovery pause here so that we can confirm more quickly
3994 : : * that a requested pause has actually taken effect.
3995 : : */
788 heikki.linnakangas@i 3996 [ + + ]:CBC 13734 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
3997 : : RECOVERY_NOT_PAUSED)
3998 : 2 : recoveryPausesHere(false);
3999 : :
4000 : : /*
4001 : : * This possibly-long loop needs to handle interrupts of startup
4002 : : * process.
4003 : : */
4004 : 13734 : HandleStartupProcInterrupts();
4005 : : }
4006 : :
4007 : : return XLREAD_FAIL; /* not reached */
4008 : : }
4009 : :
4010 : :
4011 : : /*
4012 : : * Determine what log level should be used to report a corrupt WAL record
4013 : : * in the current WAL page, previously read by XLogPageRead().
4014 : : *
4015 : : * 'emode' is the error mode that would be used to report a file-not-found
4016 : : * or legitimate end-of-WAL situation. Generally, we use it as-is, but if
4017 : : * we're retrying the exact same record that we've tried previously, only
4018 : : * complain the first time to keep the noise down. However, we only do when
4019 : : * reading from pg_wal, because we don't expect any invalid records in archive
4020 : : * or in records streamed from the primary. Files in the archive should be complete,
4021 : : * and we should never hit the end of WAL because we stop and wait for more WAL
4022 : : * to arrive before replaying it.
4023 : : *
4024 : : * NOTE: This function remembers the RecPtr value it was last called with,
4025 : : * to suppress repeated messages about the same record. Only call this when
4026 : : * you are about to ereport(), or you might cause a later message to be
4027 : : * erroneously suppressed.
4028 : : */
4029 : : static int
4030 : 345 : emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
4031 : : {
4032 : : static XLogRecPtr lastComplaint = 0;
4033 : :
4034 [ + - + - ]: 345 : if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
4035 : : {
4036 [ + + ]: 345 : if (RecPtr == lastComplaint)
4037 : 127 : emode = DEBUG1;
4038 : : else
4039 : 218 : lastComplaint = RecPtr;
4040 : : }
4041 : 345 : return emode;
4042 : : }
4043 : :
4044 : :
4045 : : /*
4046 : : * Subroutine to try to fetch and validate a prior checkpoint record.
4047 : : */
4048 : : static XLogRecord *
738 tmunro@postgresql.or 4049 : 823 : ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, XLogRecPtr RecPtr,
4050 : : TimeLineID replayTLI)
4051 : : {
4052 : : XLogRecord *record;
4053 : : uint8 info;
4054 : :
788 heikki.linnakangas@i 4055 [ - + ]: 823 : Assert(xlogreader != NULL);
4056 : :
4057 [ - + ]: 823 : if (!XRecOffIsValid(RecPtr))
4058 : : {
634 fujii@postgresql.org 4059 [ # # ]:UBC 0 : ereport(LOG,
4060 : : (errmsg("invalid checkpoint location")));
788 heikki.linnakangas@i 4061 : 0 : return NULL;
4062 : : }
4063 : :
738 tmunro@postgresql.or 4064 :CBC 823 : XLogPrefetcherBeginRead(xlogprefetcher, RecPtr);
4065 : 823 : record = ReadRecord(xlogprefetcher, LOG, true, replayTLI);
4066 : :
788 heikki.linnakangas@i 4067 [ - + ]: 823 : if (record == NULL)
4068 : : {
634 fujii@postgresql.org 4069 [ # # ]:UBC 0 : ereport(LOG,
4070 : : (errmsg("invalid checkpoint record")));
788 heikki.linnakangas@i 4071 : 0 : return NULL;
4072 : : }
788 heikki.linnakangas@i 4073 [ - + ]:CBC 823 : if (record->xl_rmid != RM_XLOG_ID)
4074 : : {
634 fujii@postgresql.org 4075 [ # # ]:UBC 0 : ereport(LOG,
4076 : : (errmsg("invalid resource manager ID in checkpoint record")));
788 heikki.linnakangas@i 4077 : 0 : return NULL;
4078 : : }
788 heikki.linnakangas@i 4079 :CBC 823 : info = record->xl_info & ~XLR_INFO_MASK;
4080 [ + + - + ]: 823 : if (info != XLOG_CHECKPOINT_SHUTDOWN &&
4081 : : info != XLOG_CHECKPOINT_ONLINE)
4082 : : {
634 fujii@postgresql.org 4083 [ # # ]:UBC 0 : ereport(LOG,
4084 : : (errmsg("invalid xl_info in checkpoint record")));
788 heikki.linnakangas@i 4085 : 0 : return NULL;
4086 : : }
788 heikki.linnakangas@i 4087 [ - + ]:CBC 823 : if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
4088 : : {
634 fujii@postgresql.org 4089 [ # # ]:UBC 0 : ereport(LOG,
4090 : : (errmsg("invalid length of checkpoint record")));
788 heikki.linnakangas@i 4091 : 0 : return NULL;
4092 : : }
788 heikki.linnakangas@i 4093 :CBC 823 : return record;
4094 : : }
4095 : :
4096 : : /*
4097 : : * Scan for new timelines that might have appeared in the archive since we
4098 : : * started recovery.
4099 : : *
4100 : : * If there are any, the function changes recovery target TLI to the latest
4101 : : * one and returns 'true'.
4102 : : */
4103 : : static bool
4104 : 314 : rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
4105 : : {
4106 : : List *newExpectedTLEs;
4107 : : bool found;
4108 : : ListCell *cell;
4109 : : TimeLineID newtarget;
4110 : 314 : TimeLineID oldtarget = recoveryTargetTLI;
4111 : 314 : TimeLineHistoryEntry *currentTle = NULL;
4112 : :
4113 : 314 : newtarget = findNewestTimeLine(recoveryTargetTLI);
4114 [ + + ]: 314 : if (newtarget == recoveryTargetTLI)
4115 : : {
4116 : : /* No new timelines found */
4117 : 303 : return false;
4118 : : }
4119 : :
4120 : : /*
4121 : : * Determine the list of expected TLIs for the new TLI
4122 : : */
4123 : :
4124 : 11 : newExpectedTLEs = readTimeLineHistory(newtarget);
4125 : :
4126 : : /*
4127 : : * If the current timeline is not part of the history of the new timeline,
4128 : : * we cannot proceed to it.
4129 : : */
4130 : 11 : found = false;
4131 [ + - + - : 22 : foreach(cell, newExpectedTLEs)
+ - ]
4132 : : {
4133 : 22 : currentTle = (TimeLineHistoryEntry *) lfirst(cell);
4134 : :
4135 [ + + ]: 22 : if (currentTle->tli == recoveryTargetTLI)
4136 : : {
4137 : 11 : found = true;
4138 : 11 : break;
4139 : : }
4140 : : }
4141 [ - + ]: 11 : if (!found)
4142 : : {
788 heikki.linnakangas@i 4143 [ # # ]:UBC 0 : ereport(LOG,
4144 : : (errmsg("new timeline %u is not a child of database system timeline %u",
4145 : : newtarget,
4146 : : replayTLI)));
4147 : 0 : return false;
4148 : : }
4149 : :
4150 : : /*
4151 : : * The current timeline was found in the history file, but check that the
4152 : : * next timeline was forked off from it *after* the current recovery
4153 : : * location.
4154 : : */
788 heikki.linnakangas@i 4155 [ - + ]:CBC 11 : if (currentTle->end < replayLSN)
4156 : : {
788 heikki.linnakangas@i 4157 [ # # ]:UBC 0 : ereport(LOG,
4158 : : (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
4159 : : newtarget,
4160 : : replayTLI,
4161 : : LSN_FORMAT_ARGS(replayLSN))));
4162 : 0 : return false;
4163 : : }
4164 : :
4165 : : /* The new timeline history seems valid. Switch target */
788 heikki.linnakangas@i 4166 :CBC 11 : recoveryTargetTLI = newtarget;
4167 : 11 : list_free_deep(expectedTLEs);
4168 : 11 : expectedTLEs = newExpectedTLEs;
4169 : :
4170 : : /*
4171 : : * As in StartupXLOG(), try to ensure we have all the history files
4172 : : * between the old target and new target in pg_wal.
4173 : : */
4174 : 11 : restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
4175 : :
4176 [ + - ]: 11 : ereport(LOG,
4177 : : (errmsg("new target timeline is %u",
4178 : : recoveryTargetTLI)));
4179 : :
4180 : 11 : return true;
4181 : : }
4182 : :
4183 : :
4184 : : /*
4185 : : * Open a logfile segment for reading (during recovery).
4186 : : *
4187 : : * If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
4188 : : * Otherwise, it's assumed to be already available in pg_wal.
4189 : : */
4190 : : static int
4191 : 2321 : XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
4192 : : XLogSource source, bool notfoundOk)
4193 : : {
4194 : : char xlogfname[MAXFNAMELEN];
4195 : : char activitymsg[MAXFNAMELEN + 16];
4196 : : char path[MAXPGPATH];
4197 : : int fd;
4198 : :
4199 : 2321 : XLogFileName(xlogfname, tli, segno, wal_segment_size);
4200 : :
4201 [ + + - ]: 2321 : switch (source)
4202 : : {
4203 : 674 : case XLOG_FROM_ARCHIVE:
4204 : : /* Report recovery progress in PS display */
4205 : 674 : snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
4206 : : xlogfname);
4207 : 674 : set_ps_display(activitymsg);
4208 : :
4209 [ + + ]: 674 : if (!RestoreArchivedFile(path, xlogfname,
4210 : : "RECOVERYXLOG",
4211 : : wal_segment_size,
4212 : : InRedo))
4213 : 623 : return -1;
4214 : 51 : break;
4215 : :
4216 : 1647 : case XLOG_FROM_PG_WAL:
4217 : : case XLOG_FROM_STREAM:
4218 : 1647 : XLogFilePath(path, tli, segno, wal_segment_size);
4219 : 1647 : break;
4220 : :
788 heikki.linnakangas@i 4221 :UBC 0 : default:
4222 [ # # ]: 0 : elog(ERROR, "invalid XLogFileRead source %d", source);
4223 : : }
4224 : :
4225 : : /*
4226 : : * If the segment was fetched from archival storage, replace the existing
4227 : : * xlog segment (if any) with the archival version.
4228 : : */
788 heikki.linnakangas@i 4229 [ + + ]:CBC 1698 : if (source == XLOG_FROM_ARCHIVE)
4230 : : {
4231 [ - + ]: 51 : Assert(!IsInstallXLogFileSegmentActive());
4232 : 51 : KeepFileRestoredFromArchive(path, xlogfname);
4233 : :
4234 : : /*
4235 : : * Set path to point at the new file in pg_wal.
4236 : : */
4237 : 51 : snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
4238 : : }
4239 : :
4240 : 1698 : fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
4241 [ + + ]: 1698 : if (fd >= 0)
4242 : : {
4243 : : /* Success! */
4244 : 1440 : curFileTLI = tli;
4245 : :
4246 : : /* Report recovery progress in PS display */
4247 : 1440 : snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
4248 : : xlogfname);
4249 : 1440 : set_ps_display(activitymsg);
4250 : :
4251 : : /* Track source of data in assorted state variables */
4252 : 1440 : readSource = source;
4253 : 1440 : XLogReceiptSource = source;
4254 : : /* In FROM_STREAM case, caller tracks receipt time, not me */
4255 [ + + ]: 1440 : if (source != XLOG_FROM_STREAM)
4256 : 1207 : XLogReceiptTime = GetCurrentTimestamp();
4257 : :
4258 : 1440 : return fd;
4259 : : }
4260 [ + - - + ]: 258 : if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
788 heikki.linnakangas@i 4261 [ # # ]:UBC 0 : ereport(PANIC,
4262 : : (errcode_for_file_access(),
4263 : : errmsg("could not open file \"%s\": %m", path)));
788 heikki.linnakangas@i 4264 :CBC 258 : return -1;
4265 : : }
4266 : :
4267 : : /*
4268 : : * Open a logfile segment for reading (during recovery).
4269 : : *
4270 : : * This version searches for the segment with any TLI listed in expectedTLEs.
4271 : : */
4272 : : static int
4273 : 1451 : XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source)
4274 : : {
4275 : : char path[MAXPGPATH];
4276 : : ListCell *cell;
4277 : : int fd;
4278 : : List *tles;
4279 : :
4280 : : /*
4281 : : * Loop looking for a suitable timeline ID: we might need to read any of
4282 : : * the timelines listed in expectedTLEs.
4283 : : *
4284 : : * We expect curFileTLI on entry to be the TLI of the preceding file in
4285 : : * sequence, or 0 if there was no predecessor. We do not allow curFileTLI
4286 : : * to go backwards; this prevents us from picking up the wrong file when a
4287 : : * parent timeline extends to higher segment numbers than the child we
4288 : : * want to read.
4289 : : *
4290 : : * If we haven't read the timeline history file yet, read it now, so that
4291 : : * we know which TLIs to scan. We don't save the list in expectedTLEs,
4292 : : * however, unless we actually find a valid segment. That way if there is
4293 : : * neither a timeline history file nor a WAL segment in the archive, and
4294 : : * streaming replication is set up, we'll read the timeline history file
4295 : : * streamed from the primary when we start streaming, instead of
4296 : : * recovering with a dummy history generated here.
4297 : : */
4298 [ + + ]: 1451 : if (expectedTLEs)
4299 : 628 : tles = expectedTLEs;
4300 : : else
4301 : 823 : tles = readTimeLineHistory(recoveryTargetTLI);
4302 : :
4303 [ + - + + : 1722 : foreach(cell, tles)
+ + ]
4304 : : {
4305 : 1480 : TimeLineHistoryEntry *hent = (TimeLineHistoryEntry *) lfirst(cell);
4306 : 1480 : TimeLineID tli = hent->tli;
4307 : :
4308 [ + + ]: 1480 : if (tli < curFileTLI)
4309 : 2 : break; /* don't bother looking at too-old TLIs */
4310 : :
4311 : : /*
4312 : : * Skip scanning the timeline ID that the logfile segment to read
4313 : : * doesn't belong to
4314 : : */
4315 [ + + ]: 1478 : if (hent->begin != InvalidXLogRecPtr)
4316 : : {
4317 : 108 : XLogSegNo beginseg = 0;
4318 : :
4319 : 108 : XLByteToSeg(hent->begin, beginseg, wal_segment_size);
4320 : :
4321 : : /*
4322 : : * The logfile segment that doesn't belong to the timeline is
4323 : : * older or newer than the segment that the timeline started or
4324 : : * ended at, respectively. It's sufficient to check only the
4325 : : * starting segment of the timeline here. Since the timelines are
4326 : : * scanned in descending order in this loop, any segments newer
4327 : : * than the ending segment should belong to newer timeline and
4328 : : * have already been read before. So it's not necessary to check
4329 : : * the ending segment of the timeline here.
4330 : : */
4331 [ + + ]: 108 : if (segno < beginseg)
4332 : 13 : continue;
4333 : : }
4334 : :
4335 [ + + - + ]: 1465 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
4336 : : {
4337 : 674 : fd = XLogFileRead(segno, emode, tli,
4338 : : XLOG_FROM_ARCHIVE, true);
4339 [ + + ]: 674 : if (fd != -1)
4340 : : {
4341 [ - + ]: 51 : elog(DEBUG1, "got WAL segment from archive");
4342 [ + + ]: 51 : if (!expectedTLEs)
4343 : 21 : expectedTLEs = tles;
4344 : 1207 : return fd;
4345 : : }
4346 : : }
4347 : :
4348 [ + + + - ]: 1414 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
4349 : : {
4350 : 1414 : fd = XLogFileRead(segno, emode, tli,
4351 : : XLOG_FROM_PG_WAL, true);
4352 [ + + ]: 1414 : if (fd != -1)
4353 : : {
4354 [ + + ]: 1156 : if (!expectedTLEs)
4355 : 801 : expectedTLEs = tles;
4356 : 1156 : return fd;
4357 : : }
4358 : : }
4359 : : }
4360 : :
4361 : : /* Couldn't find it. For simplicity, complain about front timeline */
4362 : 244 : XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size);
4363 : 244 : errno = ENOENT;
4364 [ + + ]: 244 : ereport(emode,
4365 : : (errcode_for_file_access(),
4366 : : errmsg("could not open file \"%s\": %m", path)));
4367 : 244 : return -1;
4368 : : }
4369 : :
4370 : : /*
4371 : : * Set flag to signal the walreceiver to restart. (The startup process calls
4372 : : * this on noticing a relevant configuration change.)
4373 : : */
4374 : : void
4375 : 5 : StartupRequestWalReceiverRestart(void)
4376 : : {
4377 [ + - + + ]: 5 : if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
4378 : : {
4379 [ + - ]: 4 : ereport(LOG,
4380 : : (errmsg("WAL receiver process shutdown requested")));
4381 : :
4382 : 4 : pendingWalRcvRestart = true;
4383 : : }
4384 : 5 : }
4385 : :
4386 : :
4387 : : /*
4388 : : * Has a standby promotion already been triggered?
4389 : : *
4390 : : * Unlike CheckForStandbyTrigger(), this works in any process
4391 : : * that's connected to shared memory.
4392 : : */
4393 : : bool
4394 : 49 : PromoteIsTriggered(void)
4395 : : {
4396 : : /*
4397 : : * We check shared state each time only until a standby promotion is
4398 : : * triggered. We can't trigger a promotion again, so there's no need to
4399 : : * keep checking after the shared variable has once been seen true.
4400 : : */
4401 [ + + ]: 49 : if (LocalPromoteIsTriggered)
4402 : 39 : return true;
4403 : :
4404 [ - + ]: 10 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4405 : 10 : LocalPromoteIsTriggered = XLogRecoveryCtl->SharedPromoteIsTriggered;
4406 : 10 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4407 : :
4408 : 10 : return LocalPromoteIsTriggered;
4409 : : }
4410 : :
4411 : : static void
4412 : 39 : SetPromoteIsTriggered(void)
4413 : : {
4414 [ - + ]: 39 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4415 : 39 : XLogRecoveryCtl->SharedPromoteIsTriggered = true;
4416 : 39 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4417 : :
4418 : : /*
4419 : : * Mark the recovery pause state as 'not paused' because the paused state
4420 : : * ends and promotion continues if a promotion is triggered while recovery
4421 : : * is paused. Otherwise pg_get_wal_replay_pause_state() can mistakenly
4422 : : * return 'paused' while a promotion is ongoing.
4423 : : */
4424 : 39 : SetRecoveryPause(false);
4425 : :
4426 : 39 : LocalPromoteIsTriggered = true;
4427 : 39 : }
4428 : :
4429 : : /*
4430 : : * Check whether a promote request has arrived.
4431 : : */
4432 : : static bool
4433 : 13635 : CheckForStandbyTrigger(void)
4434 : : {
4435 [ + + ]: 13635 : if (LocalPromoteIsTriggered)
4436 : 47 : return true;
4437 : :
4438 [ + + + - ]: 13588 : if (IsPromoteSignaled() && CheckPromoteSignal())
4439 : : {
4440 [ + - ]: 39 : ereport(LOG, (errmsg("received promote request")));
4441 : 39 : RemovePromoteSignalFiles();
4442 : 39 : ResetPromoteSignaled();
4443 : 39 : SetPromoteIsTriggered();
4444 : 39 : return true;
4445 : : }
4446 : :
4447 : 13549 : return false;
4448 : : }
4449 : :
4450 : : /*
4451 : : * Remove the files signaling a standby promotion request.
4452 : : */
4453 : : void
4454 : 767 : RemovePromoteSignalFiles(void)
4455 : : {
4456 : 767 : unlink(PROMOTE_SIGNAL_FILE);
4457 : 767 : }
4458 : :
4459 : : /*
4460 : : * Check to see if a promote request has arrived.
4461 : : */
4462 : : bool
4463 : 740 : CheckPromoteSignal(void)
4464 : : {
4465 : : struct stat stat_buf;
4466 : :
4467 [ + + ]: 740 : if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
4468 : 78 : return true;
4469 : :
4470 : 662 : return false;
4471 : : }
4472 : :
4473 : : /*
4474 : : * Wake up startup process to replay newly arrived WAL, or to notice that
4475 : : * failover has been requested.
4476 : : */
4477 : : void
4478 : 22344 : WakeupRecovery(void)
4479 : : {
4480 : 22344 : SetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
4481 : 22344 : }
4482 : :
4483 : : /*
4484 : : * Schedule a walreceiver wakeup in the main recovery loop.
4485 : : */
4486 : : void
788 heikki.linnakangas@i 4487 :UBC 0 : XLogRequestWalReceiverReply(void)
4488 : : {
4489 : 0 : doRequestWalReceiverReply = true;
4490 : 0 : }
4491 : :
4492 : : /*
4493 : : * Is HotStandby active yet? This is only important in special backends
4494 : : * since normal backends won't ever be able to connect until this returns
4495 : : * true. Postmaster knows this by way of signal, not via shared memory.
4496 : : *
4497 : : * Unlike testing standbyState, this works in any process that's connected to
4498 : : * shared memory. (And note that standbyState alone doesn't tell the truth
4499 : : * anyway.)
4500 : : */
4501 : : bool
788 heikki.linnakangas@i 4502 :CBC 162 : HotStandbyActive(void)
4503 : : {
4504 : : /*
4505 : : * We check shared state each time only until Hot Standby is active. We
4506 : : * can't de-activate Hot Standby, so there's no need to keep checking
4507 : : * after the shared variable has once been seen true.
4508 : : */
4509 [ + + ]: 162 : if (LocalHotStandbyActive)
4510 : 25 : return true;
4511 : : else
4512 : : {
4513 : : /* spinlock is essential on machines with weak memory ordering! */
4514 [ - + ]: 137 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4515 : 137 : LocalHotStandbyActive = XLogRecoveryCtl->SharedHotStandbyActive;
4516 : 137 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4517 : :
4518 : 137 : return LocalHotStandbyActive;
4519 : : }
4520 : : }
4521 : :
4522 : : /*
4523 : : * Like HotStandbyActive(), but to be used only in WAL replay code,
4524 : : * where we don't need to ask any other process what the state is.
4525 : : */
4526 : : static bool
788 heikki.linnakangas@i 4527 :UBC 0 : HotStandbyActiveInReplay(void)
4528 : : {
4529 [ # # # # ]: 0 : Assert(AmStartupProcess() || !IsPostmasterEnvironment);
4530 : 0 : return LocalHotStandbyActive;
4531 : : }
4532 : :
4533 : : /*
4534 : : * Get latest redo apply position.
4535 : : *
4536 : : * Exported to allow WALReceiver to read the pointer directly.
4537 : : */
4538 : : XLogRecPtr
788 heikki.linnakangas@i 4539 :CBC 58867 : GetXLogReplayRecPtr(TimeLineID *replayTLI)
4540 : : {
4541 : : XLogRecPtr recptr;
4542 : : TimeLineID tli;
4543 : :
4544 [ + + ]: 58867 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4545 : 58867 : recptr = XLogRecoveryCtl->lastReplayedEndRecPtr;
4546 : 58867 : tli = XLogRecoveryCtl->lastReplayedTLI;
4547 : 58867 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4548 : :
4549 [ + + ]: 58867 : if (replayTLI)
4550 : 2212 : *replayTLI = tli;
4551 : 58867 : return recptr;
4552 : : }
4553 : :
4554 : :
4555 : : /*
4556 : : * Get position of last applied, or the record being applied.
4557 : : *
4558 : : * This is different from GetXLogReplayRecPtr() in that if a WAL
4559 : : * record is currently being applied, this includes that record.
4560 : : */
4561 : : XLogRecPtr
4562 : 7488 : GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
4563 : : {
4564 : : XLogRecPtr recptr;
4565 : : TimeLineID tli;
4566 : :
4567 [ + + ]: 7488 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4568 : 7488 : recptr = XLogRecoveryCtl->replayEndRecPtr;
4569 : 7488 : tli = XLogRecoveryCtl->replayEndTLI;
4570 : 7488 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4571 : :
4572 [ + - ]: 7488 : if (replayEndTLI)
4573 : 7488 : *replayEndTLI = tli;
4574 : 7488 : return recptr;
4575 : : }
4576 : :
4577 : : /*
4578 : : * Save timestamp of latest processed commit/abort record.
4579 : : *
4580 : : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4581 : : * seen by processes other than the startup process. Note in particular
4582 : : * that CreateRestartPoint is executed in the checkpointer.
4583 : : */
4584 : : static void
4585 : 20220 : SetLatestXTime(TimestampTz xtime)
4586 : : {
4587 [ + + ]: 20220 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4588 : 20220 : XLogRecoveryCtl->recoveryLastXTime = xtime;
4589 : 20220 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4590 : 20220 : }
4591 : :
4592 : : /*
4593 : : * Fetch timestamp of latest processed commit/abort record.
4594 : : */
4595 : : TimestampTz
4596 : 188 : GetLatestXTime(void)
4597 : : {
4598 : : TimestampTz xtime;
4599 : :
4600 [ - + ]: 188 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4601 : 188 : xtime = XLogRecoveryCtl->recoveryLastXTime;
4602 : 188 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4603 : :
4604 : 188 : return xtime;
4605 : : }
4606 : :
4607 : : /*
4608 : : * Save timestamp of the next chunk of WAL records to apply.
4609 : : *
4610 : : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4611 : : * seen by all backends.
4612 : : */
4613 : : static void
4614 : 12333 : SetCurrentChunkStartTime(TimestampTz xtime)
4615 : : {
4616 [ - + ]: 12333 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4617 : 12333 : XLogRecoveryCtl->currentChunkStartTime = xtime;
4618 : 12333 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4619 : 12333 : }
4620 : :
4621 : : /*
4622 : : * Fetch timestamp of latest processed commit/abort record.
4623 : : * Startup process maintains an accurate local copy in XLogReceiptTime
4624 : : */
4625 : : TimestampTz
4626 : 344 : GetCurrentChunkReplayStartTime(void)
4627 : : {
4628 : : TimestampTz xtime;
4629 : :
4630 [ + + ]: 344 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4631 : 344 : xtime = XLogRecoveryCtl->currentChunkStartTime;
4632 : 344 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4633 : :
4634 : 344 : return xtime;
4635 : : }
4636 : :
4637 : : /*
4638 : : * Returns time of receipt of current chunk of XLOG data, as well as
4639 : : * whether it was received from streaming replication or from archives.
4640 : : */
4641 : : void
788 heikki.linnakangas@i 4642 :GBC 30 : GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
4643 : : {
4644 : : /*
4645 : : * This must be executed in the startup process, since we don't export the
4646 : : * relevant state to shared memory.
4647 : : */
4648 [ - + ]: 30 : Assert(InRecovery);
4649 : :
4650 : 30 : *rtime = XLogReceiptTime;
4651 : 30 : *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
4652 : 30 : }
4653 : :
4654 : : /*
4655 : : * Note that text field supplied is a parameter name and does not require
4656 : : * translation
4657 : : */
4658 : : void
788 heikki.linnakangas@i 4659 :CBC 765 : RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
4660 : : {
4661 [ - + ]: 765 : if (currValue < minValue)
4662 : : {
788 heikki.linnakangas@i 4663 [ # # ]:UBC 0 : if (HotStandbyActiveInReplay())
4664 : : {
4665 : 0 : bool warned_for_promote = false;
4666 : :
4667 [ # # ]: 0 : ereport(WARNING,
4668 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4669 : : errmsg("hot standby is not possible because of insufficient parameter settings"),
4670 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4671 : : param_name,
4672 : : currValue,
4673 : : minValue)));
4674 : :
4675 : 0 : SetRecoveryPause(true);
4676 : :
4677 [ # # ]: 0 : ereport(LOG,
4678 : : (errmsg("recovery has paused"),
4679 : : errdetail("If recovery is unpaused, the server will shut down."),
4680 : : errhint("You can then restart the server after making the necessary configuration changes.")));
4681 : :
4682 [ # # ]: 0 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
4683 : : {
4684 : 0 : HandleStartupProcInterrupts();
4685 : :
4686 [ # # ]: 0 : if (CheckForStandbyTrigger())
4687 : : {
4688 [ # # ]: 0 : if (!warned_for_promote)
4689 [ # # ]: 0 : ereport(WARNING,
4690 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4691 : : errmsg("promotion is not possible because of insufficient parameter settings"),
4692 : :
4693 : : /*
4694 : : * Repeat the detail from above so it's easy to find
4695 : : * in the log.
4696 : : */
4697 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4698 : : param_name,
4699 : : currValue,
4700 : : minValue),
4701 : : errhint("Restart the server after making the necessary configuration changes.")));
4702 : 0 : warned_for_promote = true;
4703 : : }
4704 : :
4705 : : /*
4706 : : * If recovery pause is requested then set it paused. While
4707 : : * we are in the loop, user might resume and pause again so
4708 : : * set this every time.
4709 : : */
4710 : 0 : ConfirmRecoveryPaused();
4711 : :
4712 : : /*
4713 : : * We wait on a condition variable that will wake us as soon
4714 : : * as the pause ends, but we use a timeout so we can check the
4715 : : * above conditions periodically too.
4716 : : */
4717 : 0 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
4718 : : WAIT_EVENT_RECOVERY_PAUSE);
4719 : : }
4720 : 0 : ConditionVariableCancelSleep();
4721 : : }
4722 : :
4723 [ # # ]: 0 : ereport(FATAL,
4724 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4725 : : errmsg("recovery aborted because of insufficient parameter settings"),
4726 : : /* Repeat the detail from above so it's easy to find in the log. */
4727 : : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4728 : : param_name,
4729 : : currValue,
4730 : : minValue),
4731 : : errhint("You can restart the server after making the necessary configuration changes.")));
4732 : : }
788 heikki.linnakangas@i 4733 :CBC 765 : }
4734 : :
4735 : :
4736 : : /*
4737 : : * GUC check_hook for primary_slot_name
4738 : : */
4739 : : bool
579 tgl@sss.pgh.pa.us 4740 : 1088 : check_primary_slot_name(char **newval, void **extra, GucSource source)
4741 : : {
4742 [ + - + + ]: 1088 : if (*newval && strcmp(*newval, "") != 0 &&
4743 [ - + ]: 155 : !ReplicationSlotValidateName(*newval, WARNING))
579 tgl@sss.pgh.pa.us 4744 :UBC 0 : return false;
4745 : :
579 tgl@sss.pgh.pa.us 4746 :CBC 1088 : return true;
4747 : : }
4748 : :
4749 : : /*
4750 : : * Recovery target settings: Only one of the several recovery_target* settings
4751 : : * may be set. Setting a second one results in an error. The global variable
4752 : : * recoveryTarget tracks which kind of recovery target was chosen. Other
4753 : : * variables store the actual target value (for example a string or a xid).
4754 : : * The assign functions of the parameters check whether a competing parameter
4755 : : * was already set. But we want to allow setting the same parameter multiple
4756 : : * times. We also want to allow unsetting a parameter and setting a different
4757 : : * one, so we unset recoveryTarget when the parameter is set to an empty
4758 : : * string.
4759 : : *
4760 : : * XXX this code is broken by design. Throwing an error from a GUC assign
4761 : : * hook breaks fundamental assumptions of guc.c. So long as all the variables
4762 : : * for which this can happen are PGC_POSTMASTER, the consequences are limited,
4763 : : * since we'd just abort postmaster startup anyway. Nonetheless it's likely
4764 : : * that we have odd behaviors such as unexpected GUC ordering dependencies.
4765 : : */
4766 : :
4767 : : static void
4768 : : pg_attribute_noreturn()
4769 : 1 : error_multiple_recovery_targets(void)
4770 : : {
4771 [ + - ]: 1 : ereport(ERROR,
4772 : : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4773 : : errmsg("multiple recovery targets specified"),
4774 : : errdetail("At most one of recovery_target, recovery_target_lsn, recovery_target_name, recovery_target_time, recovery_target_xid may be set.")));
4775 : : }
4776 : :
4777 : : /*
4778 : : * GUC check_hook for recovery_target
4779 : : */
4780 : : bool
4781 : 931 : check_recovery_target(char **newval, void **extra, GucSource source)
4782 : : {
4783 [ + + - + ]: 931 : if (strcmp(*newval, "immediate") != 0 && strcmp(*newval, "") != 0)
4784 : : {
579 tgl@sss.pgh.pa.us 4785 :UBC 0 : GUC_check_errdetail("The only allowed value is \"immediate\".");
4786 : 0 : return false;
4787 : : }
579 tgl@sss.pgh.pa.us 4788 :CBC 931 : return true;
4789 : : }
4790 : :
4791 : : /*
4792 : : * GUC assign_hook for recovery_target
4793 : : */
4794 : : void
4795 : 931 : assign_recovery_target(const char *newval, void *extra)
4796 : : {
4797 [ - + ]: 931 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
579 tgl@sss.pgh.pa.us 4798 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_IMMEDIATE)
4799 : 0 : error_multiple_recovery_targets();
4800 : :
579 tgl@sss.pgh.pa.us 4801 [ + - + + ]:CBC 931 : if (newval && strcmp(newval, "") != 0)
4802 : 1 : recoveryTarget = RECOVERY_TARGET_IMMEDIATE;
4803 : : else
4804 : 930 : recoveryTarget = RECOVERY_TARGET_UNSET;
4805 : 931 : }
4806 : :
4807 : : /*
4808 : : * GUC check_hook for recovery_target_lsn
4809 : : */
4810 : : bool
4811 : 934 : check_recovery_target_lsn(char **newval, void **extra, GucSource source)
4812 : : {
4813 [ + + ]: 934 : if (strcmp(*newval, "") != 0)
4814 : : {
4815 : : XLogRecPtr lsn;
4816 : : XLogRecPtr *myextra;
4817 : 6 : bool have_error = false;
4818 : :
4819 : 6 : lsn = pg_lsn_in_internal(*newval, &have_error);
4820 [ - + ]: 6 : if (have_error)
579 tgl@sss.pgh.pa.us 4821 :UBC 0 : return false;
4822 : :
579 tgl@sss.pgh.pa.us 4823 :CBC 6 : myextra = (XLogRecPtr *) guc_malloc(ERROR, sizeof(XLogRecPtr));
4824 : 6 : *myextra = lsn;
4825 : 6 : *extra = (void *) myextra;
4826 : : }
4827 : 934 : return true;
4828 : : }
4829 : :
4830 : : /*
4831 : : * GUC assign_hook for recovery_target_lsn
4832 : : */
4833 : : void
4834 : 934 : assign_recovery_target_lsn(const char *newval, void *extra)
4835 : : {
4836 [ - + ]: 934 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
579 tgl@sss.pgh.pa.us 4837 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_LSN)
4838 : 0 : error_multiple_recovery_targets();
4839 : :
579 tgl@sss.pgh.pa.us 4840 [ + - + + ]:CBC 934 : if (newval && strcmp(newval, "") != 0)
4841 : : {
4842 : 6 : recoveryTarget = RECOVERY_TARGET_LSN;
4843 : 6 : recoveryTargetLSN = *((XLogRecPtr *) extra);
4844 : : }
4845 : : else
4846 : 928 : recoveryTarget = RECOVERY_TARGET_UNSET;
4847 : 934 : }
4848 : :
4849 : : /*
4850 : : * GUC check_hook for recovery_target_name
4851 : : */
4852 : : bool
4853 : 937 : check_recovery_target_name(char **newval, void **extra, GucSource source)
4854 : : {
4855 : : /* Use the value of newval directly */
4856 [ - + ]: 937 : if (strlen(*newval) >= MAXFNAMELEN)
4857 : : {
579 tgl@sss.pgh.pa.us 4858 :UBC 0 : GUC_check_errdetail("%s is too long (maximum %d characters).",
4859 : : "recovery_target_name", MAXFNAMELEN - 1);
4860 : 0 : return false;
4861 : : }
579 tgl@sss.pgh.pa.us 4862 :CBC 937 : return true;
4863 : : }
4864 : :
4865 : : /*
4866 : : * GUC assign_hook for recovery_target_name
4867 : : */
4868 : : void
4869 : 937 : assign_recovery_target_name(const char *newval, void *extra)
4870 : : {
4871 [ - + ]: 937 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
579 tgl@sss.pgh.pa.us 4872 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_NAME)
4873 : 0 : error_multiple_recovery_targets();
4874 : :
579 tgl@sss.pgh.pa.us 4875 [ + - + + ]:CBC 937 : if (newval && strcmp(newval, "") != 0)
4876 : : {
4877 : 6 : recoveryTarget = RECOVERY_TARGET_NAME;
4878 : 6 : recoveryTargetName = newval;
4879 : : }
4880 : : else
4881 : 931 : recoveryTarget = RECOVERY_TARGET_UNSET;
4882 : 937 : }
4883 : :
4884 : : /*
4885 : : * GUC check_hook for recovery_target_time
4886 : : *
4887 : : * The interpretation of the recovery_target_time string can depend on the
4888 : : * time zone setting, so we need to wait until after all GUC processing is
4889 : : * done before we can do the final parsing of the string. This check function
4890 : : * only does a parsing pass to catch syntax errors, but we store the string
4891 : : * and parse it again when we need to use it.
4892 : : */
4893 : : bool
4894 : 933 : check_recovery_target_time(char **newval, void **extra, GucSource source)
4895 : : {
4896 [ + + ]: 933 : if (strcmp(*newval, "") != 0)
4897 : : {
4898 : : /* reject some special values */
4899 [ + - ]: 3 : if (strcmp(*newval, "now") == 0 ||
4900 [ + - ]: 3 : strcmp(*newval, "today") == 0 ||
4901 [ + - ]: 3 : strcmp(*newval, "tomorrow") == 0 ||
4902 [ - + ]: 3 : strcmp(*newval, "yesterday") == 0)
4903 : : {
579 tgl@sss.pgh.pa.us 4904 :UBC 0 : return false;
4905 : : }
4906 : :
4907 : : /*
4908 : : * parse timestamp value (see also timestamptz_in())
4909 : : */
4910 : : {
579 tgl@sss.pgh.pa.us 4911 :CBC 3 : char *str = *newval;
4912 : : fsec_t fsec;
4913 : : struct pg_tm tt,
4914 : 3 : *tm = &tt;
4915 : : int tz;
4916 : : int dtype;
4917 : : int nf;
4918 : : int dterr;
4919 : : char *field[MAXDATEFIELDS];
4920 : : int ftype[MAXDATEFIELDS];
4921 : : char workbuf[MAXDATELEN + MAXDATEFIELDS];
4922 : : DateTimeErrorExtra dtextra;
4923 : : TimestampTz timestamp;
4924 : :
4925 : 3 : dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
4926 : : field, ftype, MAXDATEFIELDS, &nf);
4927 [ + - ]: 3 : if (dterr == 0)
492 4928 : 3 : dterr = DecodeDateTime(field, ftype, nf,
4929 : : &dtype, tm, &fsec, &tz, &dtextra);
579 4930 [ - + ]: 3 : if (dterr != 0)
579 tgl@sss.pgh.pa.us 4931 :UBC 0 : return false;
579 tgl@sss.pgh.pa.us 4932 [ - + ]:CBC 3 : if (dtype != DTK_DATE)
579 tgl@sss.pgh.pa.us 4933 :UBC 0 : return false;
4934 : :
579 tgl@sss.pgh.pa.us 4935 [ - + ]:CBC 3 : if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0)
4936 : : {
579 tgl@sss.pgh.pa.us 4937 :UBC 0 : GUC_check_errdetail("timestamp out of range: \"%s\"", str);
4938 : 0 : return false;
4939 : : }
4940 : : }
4941 : : }
579 tgl@sss.pgh.pa.us 4942 :CBC 933 : return true;
4943 : : }
4944 : :
4945 : : /*
4946 : : * GUC assign_hook for recovery_target_time
4947 : : */
4948 : : void
4949 : 933 : assign_recovery_target_time(const char *newval, void *extra)
4950 : : {
4951 [ + + ]: 933 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
4952 [ + - ]: 1 : recoveryTarget != RECOVERY_TARGET_TIME)
4953 : 1 : error_multiple_recovery_targets();
4954 : :
4955 [ + - + + ]: 932 : if (newval && strcmp(newval, "") != 0)
4956 : 2 : recoveryTarget = RECOVERY_TARGET_TIME;
4957 : : else
4958 : 930 : recoveryTarget = RECOVERY_TARGET_UNSET;
4959 : 932 : }
4960 : :
4961 : : /*
4962 : : * GUC check_hook for recovery_target_timeline
4963 : : */
4964 : : bool
4965 : 931 : check_recovery_target_timeline(char **newval, void **extra, GucSource source)
4966 : : {
4967 : : RecoveryTargetTimeLineGoal rttg;
4968 : : RecoveryTargetTimeLineGoal *myextra;
4969 : :
4970 [ - + ]: 931 : if (strcmp(*newval, "current") == 0)
579 tgl@sss.pgh.pa.us 4971 :UBC 0 : rttg = RECOVERY_TARGET_TIMELINE_CONTROLFILE;
579 tgl@sss.pgh.pa.us 4972 [ + - ]:CBC 931 : else if (strcmp(*newval, "latest") == 0)
4973 : 931 : rttg = RECOVERY_TARGET_TIMELINE_LATEST;
4974 : : else
4975 : : {
579 tgl@sss.pgh.pa.us 4976 :UBC 0 : rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
4977 : :
4978 : 0 : errno = 0;
4979 : 0 : strtoul(*newval, NULL, 0);
4980 [ # # # # ]: 0 : if (errno == EINVAL || errno == ERANGE)
4981 : : {
4982 : 0 : GUC_check_errdetail("recovery_target_timeline is not a valid number.");
4983 : 0 : return false;
4984 : : }
4985 : : }
4986 : :
579 tgl@sss.pgh.pa.us 4987 :CBC 931 : myextra = (RecoveryTargetTimeLineGoal *) guc_malloc(ERROR, sizeof(RecoveryTargetTimeLineGoal));
4988 : 931 : *myextra = rttg;
4989 : 931 : *extra = (void *) myextra;
4990 : :
4991 : 931 : return true;
4992 : : }
4993 : :
4994 : : /*
4995 : : * GUC assign_hook for recovery_target_timeline
4996 : : */
4997 : : void
4998 : 931 : assign_recovery_target_timeline(const char *newval, void *extra)
4999 : : {
5000 : 931 : recoveryTargetTimeLineGoal = *((RecoveryTargetTimeLineGoal *) extra);
5001 [ - + ]: 931 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
579 tgl@sss.pgh.pa.us 5002 :UBC 0 : recoveryTargetTLIRequested = (TimeLineID) strtoul(newval, NULL, 0);
5003 : : else
579 tgl@sss.pgh.pa.us 5004 :CBC 931 : recoveryTargetTLIRequested = 0;
5005 : 931 : }
5006 : :
5007 : : /*
5008 : : * GUC check_hook for recovery_target_xid
5009 : : */
5010 : : bool
5011 : 931 : check_recovery_target_xid(char **newval, void **extra, GucSource source)
5012 : : {
5013 [ + + ]: 931 : if (strcmp(*newval, "") != 0)
5014 : : {
5015 : : TransactionId xid;
5016 : : TransactionId *myextra;
5017 : :
5018 : 1 : errno = 0;
5019 : 1 : xid = (TransactionId) strtou64(*newval, NULL, 0);
5020 [ + - - + ]: 1 : if (errno == EINVAL || errno == ERANGE)
579 tgl@sss.pgh.pa.us 5021 :UBC 0 : return false;
5022 : :
579 tgl@sss.pgh.pa.us 5023 :CBC 1 : myextra = (TransactionId *) guc_malloc(ERROR, sizeof(TransactionId));
5024 : 1 : *myextra = xid;
5025 : 1 : *extra = (void *) myextra;
5026 : : }
5027 : 931 : return true;
5028 : : }
5029 : :
5030 : : /*
5031 : : * GUC assign_hook for recovery_target_xid
5032 : : */
5033 : : void
5034 : 931 : assign_recovery_target_xid(const char *newval, void *extra)
5035 : : {
5036 [ - + ]: 931 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
579 tgl@sss.pgh.pa.us 5037 [ # # ]:UBC 0 : recoveryTarget != RECOVERY_TARGET_XID)
5038 : 0 : error_multiple_recovery_targets();
5039 : :
579 tgl@sss.pgh.pa.us 5040 [ + - + + ]:CBC 931 : if (newval && strcmp(newval, "") != 0)
5041 : : {
5042 : 1 : recoveryTarget = RECOVERY_TARGET_XID;
5043 : 1 : recoveryTargetXid = *((TransactionId *) extra);
5044 : : }
5045 : : else
5046 : 930 : recoveryTarget = RECOVERY_TARGET_UNSET;
5047 : 931 : }
|