Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xlogrecovery.c
4 : * Functions for WAL recovery, standby mode
5 : *
6 : * This source file contains functions controlling WAL recovery.
7 : * InitWalRecovery() initializes the system for crash or archive recovery,
8 : * or standby mode, depending on configuration options and the state of
9 : * the control file and possible backup label file. PerformWalRecovery()
10 : * performs the actual WAL replay, calling the rmgr-specific redo routines.
11 : * EndWalRecovery() performs end-of-recovery checks and cleanup actions,
12 : * and prepares information needed to initialize the WAL for writes. In
13 : * addition to these three main functions, there are a bunch of functions
14 : * for interrogating recovery state and controlling the recovery process.
15 : *
16 : *
17 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
18 : * Portions Copyright (c) 1994, Regents of the University of California
19 : *
20 : * src/backend/access/transam/xlogrecovery.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 :
25 : #include "postgres.h"
26 :
27 : #include <ctype.h>
28 : #include <math.h>
29 : #include <time.h>
30 : #include <sys/stat.h>
31 : #include <sys/time.h>
32 : #include <unistd.h>
33 :
34 : #include "access/timeline.h"
35 : #include "access/transam.h"
36 : #include "access/xact.h"
37 : #include "access/xlog_internal.h"
38 : #include "access/xlogarchive.h"
39 : #include "access/xlogprefetcher.h"
40 : #include "access/xlogreader.h"
41 : #include "access/xlogrecovery.h"
42 : #include "access/xlogutils.h"
43 : #include "backup/basebackup.h"
44 : #include "catalog/pg_control.h"
45 : #include "commands/tablespace.h"
46 : #include "common/file_utils.h"
47 : #include "miscadmin.h"
48 : #include "pgstat.h"
49 : #include "postmaster/bgwriter.h"
50 : #include "postmaster/startup.h"
51 : #include "replication/slot.h"
52 : #include "replication/walreceiver.h"
53 : #include "storage/fd.h"
54 : #include "storage/ipc.h"
55 : #include "storage/latch.h"
56 : #include "storage/pmsignal.h"
57 : #include "storage/proc.h"
58 : #include "storage/procarray.h"
59 : #include "storage/spin.h"
60 : #include "utils/builtins.h"
61 : #include "utils/datetime.h"
62 : #include "utils/guc_hooks.h"
63 : #include "utils/pg_lsn.h"
64 : #include "utils/ps_status.h"
65 : #include "utils/pg_rusage.h"
66 :
67 : /* Unsupported old recovery command file names (relative to $PGDATA) */
68 : #define RECOVERY_COMMAND_FILE "recovery.conf"
69 : #define RECOVERY_COMMAND_DONE "recovery.done"
70 :
71 : /*
72 : * GUC support
73 : */
74 : const struct config_enum_entry recovery_target_action_options[] = {
75 : {"pause", RECOVERY_TARGET_ACTION_PAUSE, false},
76 : {"promote", RECOVERY_TARGET_ACTION_PROMOTE, false},
77 : {"shutdown", RECOVERY_TARGET_ACTION_SHUTDOWN, false},
78 : {NULL, 0, false}
79 : };
80 :
81 : /* options formerly taken from recovery.conf for archive recovery */
82 : char *recoveryRestoreCommand = NULL;
83 : char *recoveryEndCommand = NULL;
84 : char *archiveCleanupCommand = NULL;
85 : RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
86 : bool recoveryTargetInclusive = true;
87 : int recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
88 : TransactionId recoveryTargetXid;
89 : char *recovery_target_time_string;
90 : TimestampTz recoveryTargetTime;
91 : const char *recoveryTargetName;
92 : XLogRecPtr recoveryTargetLSN;
93 : int recovery_min_apply_delay = 0;
94 :
95 : /* options formerly taken from recovery.conf for XLOG streaming */
96 : char *PrimaryConnInfo = NULL;
97 : char *PrimarySlotName = NULL;
98 : bool wal_receiver_create_temp_slot = false;
99 :
100 : /*
101 : * recoveryTargetTimeLineGoal: what the user requested, if any
102 : *
103 : * recoveryTargetTLIRequested: numeric value of requested timeline, if constant
104 : *
105 : * recoveryTargetTLI: the currently understood target timeline; changes
106 : *
107 : * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and
108 : * the timelines of its known parents, newest first (so recoveryTargetTLI is
109 : * always the first list member). Only these TLIs are expected to be seen in
110 : * the WAL segments we read, and indeed only these TLIs will be considered as
111 : * candidate WAL files to open at all.
112 : *
113 : * curFileTLI: the TLI appearing in the name of the current input WAL file.
114 : * (This is not necessarily the same as the timeline from which we are
115 : * replaying WAL, which StartupXLOG calls replayTLI, because we could be
116 : * scanning data that was copied from an ancestor timeline when the current
117 : * file was created.) During a sequential scan we do not allow this value
118 : * to decrease.
119 : */
120 : RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST;
121 : TimeLineID recoveryTargetTLIRequested = 0;
122 : TimeLineID recoveryTargetTLI = 0;
123 : static List *expectedTLEs;
124 : static TimeLineID curFileTLI;
125 :
126 : /*
127 : * When ArchiveRecoveryRequested is set, archive recovery was requested,
128 : * ie. signal files were present. When InArchiveRecovery is set, we are
129 : * currently recovering using offline XLOG archives. These variables are only
130 : * valid in the startup process.
131 : *
132 : * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
133 : * currently performing crash recovery using only XLOG files in pg_wal, but
134 : * will switch to using offline XLOG archives as soon as we reach the end of
135 : * WAL in pg_wal.
136 : */
137 : bool ArchiveRecoveryRequested = false;
138 : bool InArchiveRecovery = false;
139 :
140 : /*
141 : * When StandbyModeRequested is set, standby mode was requested, i.e.
142 : * standby.signal file was present. When StandbyMode is set, we are currently
143 : * in standby mode. These variables are only valid in the startup process.
144 : * They work similarly to ArchiveRecoveryRequested and InArchiveRecovery.
145 : */
146 : static bool StandbyModeRequested = false;
147 : bool StandbyMode = false;
148 :
149 : /* was a signal file present at startup? */
150 : static bool standby_signal_file_found = false;
151 : static bool recovery_signal_file_found = false;
152 :
153 : /*
154 : * CheckPointLoc is the position of the checkpoint record that determines
155 : * where to start the replay. It comes from the backup label file or the
156 : * control file.
157 : *
158 : * RedoStartLSN is the checkpoint's REDO location, also from the backup label
159 : * file or the control file. In standby mode, XLOG streaming usually starts
160 : * from the position where an invalid record was found. But if we fail to
161 : * read even the initial checkpoint record, we use the REDO location instead
162 : * of the checkpoint location as the start position of XLOG streaming.
163 : * Otherwise we would have to jump backwards to the REDO location after
164 : * reading the checkpoint record, because the REDO record can precede the
165 : * checkpoint record.
166 : */
167 : static XLogRecPtr CheckPointLoc = InvalidXLogRecPtr;
168 : static TimeLineID CheckPointTLI = 0;
169 : static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
170 : static TimeLineID RedoStartTLI = 0;
171 :
172 : /*
173 : * Local copy of SharedHotStandbyActive variable. False actually means "not
174 : * known, need to check the shared state".
175 : */
176 : static bool LocalHotStandbyActive = false;
177 :
178 : /*
179 : * Local copy of SharedPromoteIsTriggered variable. False actually means "not
180 : * known, need to check the shared state".
181 : */
182 : static bool LocalPromoteIsTriggered = false;
183 :
184 : /* Has the recovery code requested a walreceiver wakeup? */
185 : static bool doRequestWalReceiverReply;
186 :
187 : /* XLogReader object used to parse the WAL records */
188 : static XLogReaderState *xlogreader = NULL;
189 :
190 : /* XLogPrefetcher object used to consume WAL records with read-ahead */
191 : static XLogPrefetcher *xlogprefetcher = NULL;
192 :
193 : /* Parameters passed down from ReadRecord to the XLogPageRead callback. */
194 : typedef struct XLogPageReadPrivate
195 : {
196 : int emode;
197 : bool fetching_ckpt; /* are we fetching a checkpoint record? */
198 : bool randAccess;
199 : TimeLineID replayTLI;
200 : } XLogPageReadPrivate;
201 :
202 : /* flag to tell XLogPageRead that we have started replaying */
203 : static bool InRedo = false;
204 :
205 : /*
206 : * Codes indicating where we got a WAL file from during recovery, or where
207 : * to attempt to get one.
208 : */
209 : typedef enum
210 : {
211 : XLOG_FROM_ANY = 0, /* request to read WAL from any source */
212 : XLOG_FROM_ARCHIVE, /* restored using restore_command */
213 : XLOG_FROM_PG_WAL, /* existing file in pg_wal */
214 : XLOG_FROM_STREAM /* streamed from primary */
215 : } XLogSource;
216 :
217 : /* human-readable names for XLogSources, for debugging output */
218 : static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
219 :
220 : /*
221 : * readFile is -1 or a kernel FD for the log file segment that's currently
222 : * open for reading. readSegNo identifies the segment. readOff is the offset
223 : * of the page just read, readLen indicates how much of it has been read into
224 : * readBuf, and readSource indicates where we got the currently open file from.
225 : *
226 : * Note: we could use Reserve/ReleaseExternalFD to track consumption of this
227 : * FD too (like for openLogFile in xlog.c); but it doesn't currently seem
228 : * worthwhile, since the XLOG is not read by general-purpose sessions.
229 : */
230 : static int readFile = -1;
231 : static XLogSegNo readSegNo = 0;
232 : static uint32 readOff = 0;
233 : static uint32 readLen = 0;
234 : static XLogSource readSource = XLOG_FROM_ANY;
235 :
236 : /*
237 : * Keeps track of which source we're currently reading from. This is
238 : * different from readSource in that this is always set, even when we don't
239 : * currently have a WAL file open. If lastSourceFailed is set, our last
240 : * attempt to read from currentSource failed, and we should try another source
241 : * next.
242 : *
243 : * pendingWalRcvRestart is set when a config change occurs that requires a
244 : * walreceiver restart. This is only valid in XLOG_FROM_STREAM state.
245 : */
246 : static XLogSource currentSource = XLOG_FROM_ANY;
247 : static bool lastSourceFailed = false;
248 : static bool pendingWalRcvRestart = false;
249 :
250 : /*
251 : * These variables track when we last obtained some WAL data to process,
252 : * and where we got it from. (XLogReceiptSource is initially the same as
253 : * readSource, but readSource gets reset to zero when we don't have data
254 : * to process right now. It is also different from currentSource, which
255 : * also changes when we try to read from a source and fail, while
256 : * XLogReceiptSource tracks where we last successfully read some WAL.)
257 : */
258 : static TimestampTz XLogReceiptTime = 0;
259 : static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
260 :
261 : /* Local copy of WalRcv->flushedUpto */
262 : static XLogRecPtr flushedUpto = 0;
263 : static TimeLineID receiveTLI = 0;
264 :
265 : /*
266 : * Copy of minRecoveryPoint and backupEndPoint from the control file.
267 : *
268 : * In order to reach consistency, we must replay the WAL up to
269 : * minRecoveryPoint. If backupEndRequired is true, we must also reach
270 : * backupEndPoint, or if it's invalid, an end-of-backup record corresponding
271 : * to backupStartPoint.
272 : *
273 : * Note: In archive recovery, after consistency has been reached, the
274 : * functions in xlog.c will start updating minRecoveryPoint in the control
275 : * file. But this copy of minRecoveryPoint variable reflects the value at the
276 : * beginning of recovery, and is *not* updated after consistency is reached.
277 : */
278 : static XLogRecPtr minRecoveryPoint;
279 : static TimeLineID minRecoveryPointTLI;
280 :
281 : static XLogRecPtr backupStartPoint;
282 : static XLogRecPtr backupEndPoint;
283 : static bool backupEndRequired = false;
284 :
285 : /*
286 : * Have we reached a consistent database state? In crash recovery, we have
287 : * to replay all the WAL, so reachedConsistency is never set. During archive
288 : * recovery, the database is consistent once minRecoveryPoint is reached.
289 : *
290 : * Consistent state means that the system is internally consistent, all
291 : * the WAL has been replayed up to a certain point, and importantly, there
292 : * is no trace of later actions on disk.
293 : */
294 : bool reachedConsistency = false;
295 :
296 : /* Buffers dedicated to consistency checks of size BLCKSZ */
297 : static char *replay_image_masked = NULL;
298 : static char *primary_image_masked = NULL;
299 :
300 :
301 : /*
302 : * Shared-memory state for WAL recovery.
303 : */
304 : typedef struct XLogRecoveryCtlData
305 : {
306 : /*
307 : * SharedHotStandbyActive indicates if we allow hot standby queries to be
308 : * run. Protected by info_lck.
309 : */
310 : bool SharedHotStandbyActive;
311 :
312 : /*
313 : * SharedPromoteIsTriggered indicates if a standby promotion has been
314 : * triggered. Protected by info_lck.
315 : */
316 : bool SharedPromoteIsTriggered;
317 :
318 : /*
319 : * recoveryWakeupLatch is used to wake up the startup process to continue
320 : * WAL replay, if it is waiting for WAL to arrive or promotion to be
321 : * requested.
322 : *
323 : * Note that the startup process also uses another latch, its procLatch,
324 : * to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
325 : * signaling the startup process in favor of using its procLatch, which
326 : * comports better with possible generic signal handlers using that latch.
327 : * But we should not do that because the startup process doesn't assume
328 : * that it's waken up by walreceiver process or SIGHUP signal handler
329 : * while it's waiting for recovery conflict. The separate latches,
330 : * recoveryWakeupLatch and procLatch, should be used for inter-process
331 : * communication for WAL replay and recovery conflict, respectively.
332 : */
333 : Latch recoveryWakeupLatch;
334 :
335 : /*
336 : * Last record successfully replayed.
337 : */
338 : XLogRecPtr lastReplayedReadRecPtr; /* start position */
339 : XLogRecPtr lastReplayedEndRecPtr; /* end+1 position */
340 : TimeLineID lastReplayedTLI; /* timeline */
341 :
342 : /*
343 : * When we're currently replaying a record, ie. in a redo function,
344 : * replayEndRecPtr points to the end+1 of the record being replayed,
345 : * otherwise it's equal to lastReplayedEndRecPtr.
346 : */
347 : XLogRecPtr replayEndRecPtr;
348 : TimeLineID replayEndTLI;
349 : /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
350 : TimestampTz recoveryLastXTime;
351 :
352 : /*
353 : * timestamp of when we started replaying the current chunk of WAL data,
354 : * only relevant for replication or archive recovery
355 : */
356 : TimestampTz currentChunkStartTime;
357 : /* Recovery pause state */
358 : RecoveryPauseState recoveryPauseState;
359 : ConditionVariable recoveryNotPausedCV;
360 :
361 : slock_t info_lck; /* locks shared variables shown above */
362 : } XLogRecoveryCtlData;
363 :
364 : static XLogRecoveryCtlData *XLogRecoveryCtl = NULL;
365 :
366 : /*
367 : * abortedRecPtr is the start pointer of a broken record at end of WAL when
368 : * recovery completes; missingContrecPtr is the location of the first
369 : * contrecord that went missing. See CreateOverwriteContrecordRecord for
370 : * details.
371 : */
372 : static XLogRecPtr abortedRecPtr;
373 : static XLogRecPtr missingContrecPtr;
374 :
375 : /*
376 : * if recoveryStopsBefore/After returns true, it saves information of the stop
377 : * point here
378 : */
379 : static TransactionId recoveryStopXid;
380 : static TimestampTz recoveryStopTime;
381 : static XLogRecPtr recoveryStopLSN;
382 : static char recoveryStopName[MAXFNAMELEN];
383 : static bool recoveryStopAfter;
384 :
385 : /* prototypes for local functions */
386 : static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI);
387 :
388 : static void EnableStandbyMode(void);
389 : static void readRecoverySignalFile(void);
390 : static void validateRecoveryParameters(void);
391 : static bool read_backup_label(XLogRecPtr *checkPointLoc,
392 : TimeLineID *backupLabelTLI,
393 : bool *backupEndRequired, bool *backupFromStandby);
394 : static bool read_tablespace_map(List **tablespaces);
395 :
396 : static void xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI);
397 : static void CheckRecoveryConsistency(void);
398 : static void rm_redo_error_callback(void *arg);
399 : #ifdef WAL_DEBUG
400 : static void xlog_outrec(StringInfo buf, XLogReaderState *record);
401 : #endif
402 : static void xlog_block_info(StringInfo buf, XLogReaderState *record);
403 : static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
404 : TimeLineID prevTLI, TimeLineID replayTLI);
405 : static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime);
406 : static void verifyBackupPageConsistency(XLogReaderState *record);
407 :
408 : static bool recoveryStopsBefore(XLogReaderState *record);
409 : static bool recoveryStopsAfter(XLogReaderState *record);
410 : static char *getRecoveryStopReason(void);
411 : static void recoveryPausesHere(bool endOfRecovery);
412 : static bool recoveryApplyDelay(XLogReaderState *record);
413 : static void ConfirmRecoveryPaused(void);
414 :
415 : static XLogRecord *ReadRecord(XLogPrefetcher *xlogprefetcher,
416 : int emode, bool fetching_ckpt,
417 : TimeLineID replayTLI);
418 :
419 : static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
420 : int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
421 : static XLogPageReadResult WaitForWALToBecomeAvailable(XLogRecPtr RecPtr,
422 : bool randAccess,
423 : bool fetching_ckpt,
424 : XLogRecPtr tliRecPtr,
425 : TimeLineID replayTLI,
426 : XLogRecPtr replayLSN,
427 : bool nonblocking);
428 : static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
429 : static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
430 : XLogRecPtr RecPtr, TimeLineID replayTLI);
431 : static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
432 : static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
433 : XLogSource source, bool notfoundOk);
434 : static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source);
435 :
436 : static bool CheckForStandbyTrigger(void);
437 : static void SetPromoteIsTriggered(void);
438 : static bool HotStandbyActiveInReplay(void);
439 :
440 : static void SetCurrentChunkStartTime(TimestampTz xtime);
441 : static void SetLatestXTime(TimestampTz xtime);
442 :
443 : /*
444 : * Initialization of shared memory for WAL recovery
445 : */
446 : Size
417 heikki.linnakangas 447 GIC 4564 : XLogRecoveryShmemSize(void)
448 : {
417 heikki.linnakangas 449 ECB : Size size;
450 :
451 : /* XLogRecoveryCtl */
417 heikki.linnakangas 452 GIC 4564 : size = sizeof(XLogRecoveryCtlData);
453 :
417 heikki.linnakangas 454 CBC 4564 : return size;
455 : }
417 heikki.linnakangas 456 ECB :
457 : void
417 heikki.linnakangas 458 GIC 1826 : XLogRecoveryShmemInit(void)
459 : {
417 heikki.linnakangas 460 ECB : bool found;
461 :
417 heikki.linnakangas 462 GIC 1826 : XLogRecoveryCtl = (XLogRecoveryCtlData *)
463 1826 : ShmemInitStruct("XLOG Recovery Ctl", XLogRecoveryShmemSize(), &found);
417 heikki.linnakangas 464 CBC 1826 : if (found)
417 heikki.linnakangas 465 LBC 0 : return;
417 heikki.linnakangas 466 CBC 1826 : memset(XLogRecoveryCtl, 0, sizeof(XLogRecoveryCtlData));
417 heikki.linnakangas 467 EUB :
417 heikki.linnakangas 468 CBC 1826 : SpinLockInit(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 469 GIC 1826 : InitSharedLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
417 heikki.linnakangas 470 CBC 1826 : ConditionVariableInit(&XLogRecoveryCtl->recoveryNotPausedCV);
417 heikki.linnakangas 471 ECB : }
472 :
473 : /*
474 : * A thin wrapper to enable StandbyMode and do other preparatory work as
475 : * needed.
476 : */
477 : static void
62 rhaas 478 GIC 68 : EnableStandbyMode(void)
479 : {
62 rhaas 480 CBC 68 : StandbyMode = true;
481 :
62 rhaas 482 ECB : /*
483 : * To avoid server log bloat, we don't report recovery progress in a
484 : * standby as it will always be in recovery unless promoted. We disable
485 : * startup progress timeout in standby mode to avoid calling
486 : * startup_progress_timeout_handler() unnecessarily.
487 : */
62 rhaas 488 GIC 68 : disable_startup_progress_timeout();
489 68 : }
62 rhaas 490 ECB :
417 heikki.linnakangas 491 : /*
492 : * Prepare the system for WAL recovery, if needed.
493 : *
494 : * This is called by StartupXLOG() which coordinates the server startup
495 : * sequence. This function analyzes the control file and the backup label
496 : * file, if any, and figures out whether we need to perform crash recovery or
497 : * archive recovery, and how far we need to replay the WAL to reach a
498 : * consistent state.
499 : *
500 : * This doesn't yet change the on-disk state, except for creating the symlinks
501 : * from table space map file if any, and for fetching WAL files needed to find
502 : * the checkpoint record. On entry, the caller has already read the control
503 : * file into memory, and passes it as argument. This function updates it to
504 : * reflect the recovery state, and the caller is expected to write it back to
505 : * disk does after initializing other subsystems, but before calling
506 : * PerformWalRecovery().
507 : *
508 : * This initializes some global variables like ArchiveModeRequested, and
509 : * StandbyModeRequested and InRecovery.
510 : */
511 : void
417 heikki.linnakangas 512 GIC 1176 : InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
513 : bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
417 heikki.linnakangas 514 ECB : {
515 : XLogPageReadPrivate *private;
516 : struct stat st;
517 : bool wasShutdown;
518 : XLogRecord *record;
519 : DBState dbstate_at_startup;
417 heikki.linnakangas 520 GIC 1176 : bool haveTblspcMap = false;
521 1176 : bool haveBackupLabel = false;
417 heikki.linnakangas 522 ECB : CheckPoint checkPoint;
417 heikki.linnakangas 523 CBC 1176 : bool backupFromStandby = false;
524 :
525 1176 : dbstate_at_startup = ControlFile->state;
526 :
417 heikki.linnakangas 527 ECB : /*
528 : * Initialize on the assumption we want to recover to the latest timeline
529 : * that's active according to pg_control.
530 : */
417 heikki.linnakangas 531 GIC 1176 : if (ControlFile->minRecoveryPointTLI >
532 1176 : ControlFile->checkPointCopy.ThisTimeLineID)
417 heikki.linnakangas 533 CBC 1 : recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
417 heikki.linnakangas 534 ECB : else
417 heikki.linnakangas 535 CBC 1175 : recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
536 :
417 heikki.linnakangas 537 ECB : /*
538 : * Check for signal files, and if so set up state for offline recovery
539 : */
417 heikki.linnakangas 540 GIC 1176 : readRecoverySignalFile();
541 1176 : validateRecoveryParameters();
417 heikki.linnakangas 542 ECB :
417 heikki.linnakangas 543 CBC 1176 : if (ArchiveRecoveryRequested)
544 : {
545 73 : if (StandbyModeRequested)
417 heikki.linnakangas 546 GIC 68 : ereport(LOG,
417 heikki.linnakangas 547 ECB : (errmsg("entering standby mode")));
417 heikki.linnakangas 548 CBC 5 : else if (recoveryTarget == RECOVERY_TARGET_XID)
417 heikki.linnakangas 549 UIC 0 : ereport(LOG,
417 heikki.linnakangas 550 ECB : (errmsg("starting point-in-time recovery to XID %u",
417 heikki.linnakangas 551 EUB : recoveryTargetXid)));
417 heikki.linnakangas 552 GIC 5 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
417 heikki.linnakangas 553 UIC 0 : ereport(LOG,
417 heikki.linnakangas 554 ECB : (errmsg("starting point-in-time recovery to %s",
417 heikki.linnakangas 555 EUB : timestamptz_to_str(recoveryTargetTime))));
417 heikki.linnakangas 556 GIC 5 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
557 3 : ereport(LOG,
417 heikki.linnakangas 558 ECB : (errmsg("starting point-in-time recovery to \"%s\"",
559 : recoveryTargetName)));
417 heikki.linnakangas 560 GIC 2 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
417 heikki.linnakangas 561 UIC 0 : ereport(LOG,
417 heikki.linnakangas 562 ECB : (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
417 heikki.linnakangas 563 EUB : LSN_FORMAT_ARGS(recoveryTargetLSN))));
417 heikki.linnakangas 564 GIC 2 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
417 heikki.linnakangas 565 UIC 0 : ereport(LOG,
417 heikki.linnakangas 566 ECB : (errmsg("starting point-in-time recovery to earliest consistent point")));
417 heikki.linnakangas 567 EUB : else
417 heikki.linnakangas 568 GIC 2 : ereport(LOG,
569 : (errmsg("starting archive recovery")));
417 heikki.linnakangas 570 ECB : }
571 :
572 : /*
573 : * Take ownership of the wakeup latch if we're going to sleep during
574 : * recovery.
575 : */
417 heikki.linnakangas 576 GIC 1176 : if (ArchiveRecoveryRequested)
577 73 : OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
417 heikki.linnakangas 578 ECB :
417 heikki.linnakangas 579 CBC 1176 : private = palloc0(sizeof(XLogPageReadPrivate));
417 heikki.linnakangas 580 GIC 1176 : xlogreader =
417 heikki.linnakangas 581 CBC 1176 : XLogReaderAllocate(wal_segment_size, NULL,
582 1176 : XL_ROUTINE(.page_read = &XLogPageRead,
417 heikki.linnakangas 583 ECB : .segment_open = NULL,
584 : .segment_close = wal_segment_close),
585 : private);
417 heikki.linnakangas 586 GIC 1176 : if (!xlogreader)
417 heikki.linnakangas 587 UIC 0 : ereport(ERROR,
417 heikki.linnakangas 588 ECB : (errcode(ERRCODE_OUT_OF_MEMORY),
417 heikki.linnakangas 589 EUB : errmsg("out of memory"),
590 : errdetail("Failed while allocating a WAL reading processor.")));
417 heikki.linnakangas 591 GIC 1176 : xlogreader->system_identifier = ControlFile->system_identifier;
592 :
367 tmunro 593 ECB : /*
594 : * Set the WAL decode buffer size. This limits how far ahead we can read
595 : * in the WAL.
596 : */
367 tmunro 597 GIC 1176 : XLogReaderSetDecodeBuffer(xlogreader, NULL, wal_decode_buffer_size);
598 :
367 tmunro 599 ECB : /* Create a WAL prefetcher. */
367 tmunro 600 GIC 1176 : xlogprefetcher = XLogPrefetcherAllocate(xlogreader);
601 :
417 heikki.linnakangas 602 ECB : /*
603 : * Allocate two page buffers dedicated to WAL consistency checks. We do
604 : * it this way, rather than just making static arrays, for two reasons:
605 : * (1) no need to waste the storage in most instantiations of the backend;
606 : * (2) a static char array isn't guaranteed to have any particular
607 : * alignment, whereas palloc() will provide MAXALIGN'd storage.
608 : */
417 heikki.linnakangas 609 GIC 1176 : replay_image_masked = (char *) palloc(BLCKSZ);
610 1176 : primary_image_masked = (char *) palloc(BLCKSZ);
417 heikki.linnakangas 611 ECB :
417 heikki.linnakangas 612 CBC 1176 : if (read_backup_label(&CheckPointLoc, &CheckPointTLI, &backupEndRequired,
613 : &backupFromStandby))
417 heikki.linnakangas 614 ECB : {
417 heikki.linnakangas 615 GIC 51 : List *tablespaces = NIL;
616 :
417 heikki.linnakangas 617 ECB : /*
618 : * Archive recovery was requested, and thanks to the backup label
619 : * file, we know how far we need to replay to reach consistency. Enter
620 : * archive recovery directly.
621 : */
417 heikki.linnakangas 622 GIC 51 : InArchiveRecovery = true;
623 51 : if (StandbyModeRequested)
62 rhaas 624 CBC 44 : EnableStandbyMode();
417 heikki.linnakangas 625 ECB :
626 : /*
627 : * When a backup_label file is present, we want to roll forward from
628 : * the checkpoint it identifies, rather than using pg_control.
629 : */
263 fujii 630 GNC 51 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
631 : CheckPointTLI);
417 heikki.linnakangas 632 CBC 51 : if (record != NULL)
633 : {
634 51 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
417 heikki.linnakangas 635 GIC 51 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
417 heikki.linnakangas 636 CBC 51 : ereport(DEBUG1,
417 heikki.linnakangas 637 ECB : (errmsg_internal("checkpoint record is at %X/%X",
638 : LSN_FORMAT_ARGS(CheckPointLoc))));
417 heikki.linnakangas 639 GIC 51 : InRecovery = true; /* force recovery even if SHUTDOWNED */
640 :
417 heikki.linnakangas 641 ECB : /*
642 : * Make sure that REDO location exists. This may not be the case
643 : * if there was a crash during an online backup, which left a
644 : * backup_label around that references a WAL segment that's
645 : * already been archived.
646 : */
417 heikki.linnakangas 647 GIC 51 : if (checkPoint.redo < CheckPointLoc)
648 : {
367 tmunro 649 CBC 51 : XLogPrefetcherBeginRead(xlogprefetcher, checkPoint.redo);
367 tmunro 650 GIC 51 : if (!ReadRecord(xlogprefetcher, LOG, false,
417 heikki.linnakangas 651 ECB : checkPoint.ThisTimeLineID))
417 heikki.linnakangas 652 LBC 0 : ereport(FATAL,
653 : (errmsg("could not find redo location referenced by checkpoint record"),
417 heikki.linnakangas 654 EUB : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
655 : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
656 : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
657 : DataDir, DataDir, DataDir)));
658 : }
659 : }
660 : else
661 : {
417 heikki.linnakangas 662 UIC 0 : ereport(FATAL,
663 : (errmsg("could not locate required checkpoint record"),
417 heikki.linnakangas 664 EUB : errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
665 : "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
666 : "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
667 : DataDir, DataDir, DataDir)));
668 : wasShutdown = false; /* keep compiler quiet */
669 : }
670 :
671 : /* Read the tablespace_map file if present and create symlinks. */
417 heikki.linnakangas 672 GIC 51 : if (read_tablespace_map(&tablespaces))
673 : {
417 heikki.linnakangas 674 ECB : ListCell *lc;
675 :
417 heikki.linnakangas 676 GIC 2 : foreach(lc, tablespaces)
677 : {
417 heikki.linnakangas 678 CBC 1 : tablespaceinfo *ti = lfirst(lc);
679 : char *linkloc;
417 heikki.linnakangas 680 ECB :
417 heikki.linnakangas 681 GIC 1 : linkloc = psprintf("pg_tblspc/%s", ti->oid);
682 :
417 heikki.linnakangas 683 ECB : /*
684 : * Remove the existing symlink if any and Create the symlink
685 : * under PGDATA.
686 : */
417 heikki.linnakangas 687 GIC 1 : remove_tablespace_symlink(linkloc);
688 :
417 heikki.linnakangas 689 CBC 1 : if (symlink(ti->path, linkloc) < 0)
417 heikki.linnakangas 690 UIC 0 : ereport(ERROR,
417 heikki.linnakangas 691 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 692 EUB : errmsg("could not create symbolic link \"%s\": %m",
693 : linkloc)));
694 :
417 heikki.linnakangas 695 GIC 1 : pfree(ti->oid);
696 1 : pfree(ti->path);
417 heikki.linnakangas 697 CBC 1 : pfree(ti);
417 heikki.linnakangas 698 ECB : }
699 :
700 : /* tell the caller to delete it later */
417 heikki.linnakangas 701 GIC 1 : haveTblspcMap = true;
702 : }
417 heikki.linnakangas 703 ECB :
704 : /* tell the caller to delete it later */
417 heikki.linnakangas 705 GIC 51 : haveBackupLabel = true;
706 : }
417 heikki.linnakangas 707 ECB : else
708 : {
709 : /*
710 : * If tablespace_map file is present without backup_label file, there
711 : * is no use of such file. There is no harm in retaining it, but it
712 : * is better to get rid of the map file so that we don't have any
713 : * redundant file in data directory and it will avoid any sort of
714 : * confusion. It seems prudent though to just rename the file out of
715 : * the way rather than delete it completely, also we ignore any error
716 : * that occurs in rename operation as even if map file is present
717 : * without backup_label file, it is harmless.
718 : */
417 heikki.linnakangas 719 GIC 1125 : if (stat(TABLESPACE_MAP, &st) == 0)
720 : {
417 heikki.linnakangas 721 CBC 1 : unlink(TABLESPACE_MAP_OLD);
417 heikki.linnakangas 722 GIC 1 : if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
417 heikki.linnakangas 723 CBC 1 : ereport(LOG,
417 heikki.linnakangas 724 ECB : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
725 : TABLESPACE_MAP, BACKUP_LABEL_FILE),
726 : errdetail("File \"%s\" was renamed to \"%s\".",
727 : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
728 : else
417 heikki.linnakangas 729 UIC 0 : ereport(LOG,
730 : (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
417 heikki.linnakangas 731 EUB : TABLESPACE_MAP, BACKUP_LABEL_FILE),
732 : errdetail("Could not rename file \"%s\" to \"%s\": %m.",
733 : TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
734 : }
735 :
736 : /*
737 : * It's possible that archive recovery was requested, but we don't
738 : * know how far we need to replay the WAL before we reach consistency.
739 : * This can happen for example if a base backup is taken from a
740 : * running server using an atomic filesystem snapshot, without calling
741 : * pg_backup_start/stop. Or if you just kill a running primary server
742 : * and put it into archive recovery by creating a recovery signal
743 : * file.
744 : *
745 : * Our strategy in that case is to perform crash recovery first,
746 : * replaying all the WAL present in pg_wal, and only enter archive
747 : * recovery after that.
748 : *
749 : * But usually we already know how far we need to replay the WAL (up
750 : * to minRecoveryPoint, up to backupEndPoint, or until we see an
751 : * end-of-backup record), and we can enter archive recovery directly.
752 : */
417 heikki.linnakangas 753 GIC 1125 : if (ArchiveRecoveryRequested &&
754 24 : (ControlFile->minRecoveryPoint != InvalidXLogRecPtr ||
417 heikki.linnakangas 755 CBC 8 : ControlFile->backupEndRequired ||
756 8 : ControlFile->backupEndPoint != InvalidXLogRecPtr ||
757 8 : ControlFile->state == DB_SHUTDOWNED))
417 heikki.linnakangas 758 ECB : {
417 heikki.linnakangas 759 CBC 22 : InArchiveRecovery = true;
417 heikki.linnakangas 760 GIC 22 : if (StandbyModeRequested)
62 rhaas 761 CBC 22 : EnableStandbyMode();
417 heikki.linnakangas 762 ECB : }
763 :
764 : /* Get the last valid checkpoint record. */
417 heikki.linnakangas 765 GIC 1125 : CheckPointLoc = ControlFile->checkPoint;
766 1125 : CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
417 heikki.linnakangas 767 CBC 1125 : RedoStartLSN = ControlFile->checkPointCopy.redo;
768 1125 : RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
263 fujii 769 GNC 1125 : record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
417 heikki.linnakangas 770 ECB : CheckPointTLI);
417 heikki.linnakangas 771 CBC 1125 : if (record != NULL)
772 : {
773 1125 : ereport(DEBUG1,
774 : (errmsg_internal("checkpoint record is at %X/%X",
417 heikki.linnakangas 775 ECB : LSN_FORMAT_ARGS(CheckPointLoc))));
776 : }
777 : else
778 : {
779 : /*
780 : * We used to attempt to go back to a secondary checkpoint record
781 : * here, but only when not in standby mode. We now just fail if we
782 : * can't read the last checkpoint because this allows us to
783 : * simplify processing around checkpoints.
784 : */
417 heikki.linnakangas 785 UIC 0 : ereport(PANIC,
786 : (errmsg("could not locate a valid checkpoint record")));
417 heikki.linnakangas 787 EUB : }
417 heikki.linnakangas 788 GIC 1125 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
789 1125 : wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
417 heikki.linnakangas 790 ECB : }
791 :
792 : /*
793 : * If the location of the checkpoint record is not on the expected
794 : * timeline in the history of the requested timeline, we cannot proceed:
795 : * the backup is not part of the history of the requested timeline.
796 : */
417 heikki.linnakangas 797 GIC 1176 : Assert(expectedTLEs); /* was initialized by reading checkpoint
798 : * record */
417 heikki.linnakangas 799 CBC 1176 : if (tliOfPointInHistory(CheckPointLoc, expectedTLEs) !=
800 : CheckPointTLI)
417 heikki.linnakangas 801 ECB : {
802 : XLogRecPtr switchpoint;
803 :
804 : /*
805 : * tliSwitchPoint will throw an error if the checkpoint's timeline is
806 : * not in expectedTLEs at all.
807 : */
417 heikki.linnakangas 808 UIC 0 : switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
809 0 : ereport(FATAL,
417 heikki.linnakangas 810 EUB : (errmsg("requested timeline %u is not a child of this server's history",
811 : recoveryTargetTLI),
812 : errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
813 : LSN_FORMAT_ARGS(ControlFile->checkPoint),
814 : ControlFile->checkPointCopy.ThisTimeLineID,
815 : LSN_FORMAT_ARGS(switchpoint))));
816 : }
817 :
818 : /*
819 : * The min recovery point should be part of the requested timeline's
820 : * history, too.
821 : */
417 heikki.linnakangas 822 GIC 1176 : if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
823 19 : tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
417 heikki.linnakangas 824 CBC 19 : ControlFile->minRecoveryPointTLI)
417 heikki.linnakangas 825 LBC 0 : ereport(FATAL,
417 heikki.linnakangas 826 ECB : (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
417 heikki.linnakangas 827 EUB : recoveryTargetTLI,
828 : LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
829 : ControlFile->minRecoveryPointTLI)));
830 :
417 heikki.linnakangas 831 GIC 1176 : ereport(DEBUG1,
832 : (errmsg_internal("redo record is at %X/%X; shutdown %s",
417 heikki.linnakangas 833 ECB : LSN_FORMAT_ARGS(checkPoint.redo),
834 : wasShutdown ? "true" : "false")));
417 heikki.linnakangas 835 GIC 1176 : ereport(DEBUG1,
836 : (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
417 heikki.linnakangas 837 ECB : U64FromFullTransactionId(checkPoint.nextXid),
838 : checkPoint.nextOid)));
417 heikki.linnakangas 839 GIC 1176 : ereport(DEBUG1,
840 : (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
417 heikki.linnakangas 841 ECB : checkPoint.nextMulti, checkPoint.nextMultiOffset)));
417 heikki.linnakangas 842 GIC 1176 : ereport(DEBUG1,
843 : (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
417 heikki.linnakangas 844 ECB : checkPoint.oldestXid, checkPoint.oldestXidDB)));
417 heikki.linnakangas 845 GIC 1176 : ereport(DEBUG1,
846 : (errmsg_internal("oldest MultiXactId: %u, in database %u",
417 heikki.linnakangas 847 ECB : checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
417 heikki.linnakangas 848 GIC 1176 : ereport(DEBUG1,
849 : (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
417 heikki.linnakangas 850 ECB : checkPoint.oldestCommitTsXid,
851 : checkPoint.newestCommitTsXid)));
417 heikki.linnakangas 852 GIC 1176 : if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid)))
417 heikki.linnakangas 853 UIC 0 : ereport(PANIC,
417 heikki.linnakangas 854 ECB : (errmsg("invalid next transaction ID")));
417 heikki.linnakangas 855 EUB :
856 : /* sanity check */
417 heikki.linnakangas 857 GIC 1176 : if (checkPoint.redo > CheckPointLoc)
417 heikki.linnakangas 858 UIC 0 : ereport(PANIC,
417 heikki.linnakangas 859 ECB : (errmsg("invalid redo in checkpoint record")));
417 heikki.linnakangas 860 EUB :
861 : /*
862 : * Check whether we need to force recovery from WAL. If it appears to
863 : * have been a clean shutdown and we did not have a recovery signal file,
864 : * then assume no recovery needed.
865 : */
417 heikki.linnakangas 866 GIC 1176 : if (checkPoint.redo < CheckPointLoc)
867 : {
417 heikki.linnakangas 868 CBC 79 : if (wasShutdown)
417 heikki.linnakangas 869 UIC 0 : ereport(PANIC,
417 heikki.linnakangas 870 ECB : (errmsg("invalid redo record in shutdown checkpoint")));
417 heikki.linnakangas 871 GBC 79 : InRecovery = true;
872 : }
417 heikki.linnakangas 873 CBC 1097 : else if (ControlFile->state != DB_SHUTDOWNED)
417 heikki.linnakangas 874 GIC 66 : InRecovery = true;
417 heikki.linnakangas 875 CBC 1031 : else if (ArchiveRecoveryRequested)
417 heikki.linnakangas 876 ECB : {
877 : /* force recovery due to presence of recovery signal file */
417 heikki.linnakangas 878 GIC 6 : InRecovery = true;
879 : }
417 heikki.linnakangas 880 ECB :
881 : /*
882 : * If recovery is needed, update our in-memory copy of pg_control to show
883 : * that we are recovering and to show the selected checkpoint as the place
884 : * we are starting from. We also mark pg_control with any minimum recovery
885 : * stop point obtained from a backup history file.
886 : *
887 : * We don't write the changes to disk yet, though. Only do that after
888 : * initializing various subsystems.
889 : */
417 heikki.linnakangas 890 GIC 1176 : if (InRecovery)
891 : {
417 heikki.linnakangas 892 CBC 151 : if (InArchiveRecovery)
893 : {
894 73 : ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
895 : }
417 heikki.linnakangas 896 ECB : else
897 : {
417 heikki.linnakangas 898 GIC 78 : ereport(LOG,
899 : (errmsg("database system was not properly shut down; "
417 heikki.linnakangas 900 ECB : "automatic recovery in progress")));
417 heikki.linnakangas 901 GIC 78 : if (recoveryTargetTLI > ControlFile->checkPointCopy.ThisTimeLineID)
902 1 : ereport(LOG,
417 heikki.linnakangas 903 ECB : (errmsg("crash recovery starts in timeline %u "
904 : "and has target timeline %u",
905 : ControlFile->checkPointCopy.ThisTimeLineID,
906 : recoveryTargetTLI)));
417 heikki.linnakangas 907 GIC 78 : ControlFile->state = DB_IN_CRASH_RECOVERY;
908 : }
417 heikki.linnakangas 909 CBC 151 : ControlFile->checkPoint = CheckPointLoc;
417 heikki.linnakangas 910 GIC 151 : ControlFile->checkPointCopy = checkPoint;
417 heikki.linnakangas 911 CBC 151 : if (InArchiveRecovery)
417 heikki.linnakangas 912 ECB : {
913 : /* initialize minRecoveryPoint if not set yet */
417 heikki.linnakangas 914 GIC 73 : if (ControlFile->minRecoveryPoint < checkPoint.redo)
915 : {
417 heikki.linnakangas 916 CBC 55 : ControlFile->minRecoveryPoint = checkPoint.redo;
417 heikki.linnakangas 917 GIC 55 : ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
417 heikki.linnakangas 918 ECB : }
919 : }
920 :
921 : /*
922 : * Set backupStartPoint if we're starting recovery from a base backup.
923 : *
924 : * Also set backupEndPoint and use minRecoveryPoint as the backup end
925 : * location if we're starting recovery from a base backup which was
926 : * taken from a standby. In this case, the database system status in
927 : * pg_control must indicate that the database was already in recovery.
928 : * Usually that will be DB_IN_ARCHIVE_RECOVERY but also can be
929 : * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted
930 : * before reaching this point; e.g. because restore_command or
931 : * primary_conninfo were faulty.
932 : *
933 : * Any other state indicates that the backup somehow became corrupted
934 : * and we can't sensibly continue with recovery.
935 : */
417 heikki.linnakangas 936 GIC 151 : if (haveBackupLabel)
937 : {
417 heikki.linnakangas 938 CBC 51 : ControlFile->backupStartPoint = checkPoint.redo;
417 heikki.linnakangas 939 GIC 51 : ControlFile->backupEndRequired = backupEndRequired;
417 heikki.linnakangas 940 ECB :
417 heikki.linnakangas 941 CBC 51 : if (backupFromStandby)
942 : {
943 2 : if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
944 : dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
417 heikki.linnakangas 945 LBC 0 : ereport(FATAL,
946 : (errmsg("backup_label contains data inconsistent with control file"),
417 heikki.linnakangas 947 EUB : errhint("This means that the backup is corrupted and you will "
948 : "have to use another backup for recovery.")));
417 heikki.linnakangas 949 GIC 2 : ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
950 : }
417 heikki.linnakangas 951 ECB : }
952 : }
953 :
954 : /* remember these, so that we know when we have reached consistency */
417 heikki.linnakangas 955 GIC 1176 : backupStartPoint = ControlFile->backupStartPoint;
956 1176 : backupEndRequired = ControlFile->backupEndRequired;
417 heikki.linnakangas 957 CBC 1176 : backupEndPoint = ControlFile->backupEndPoint;
958 1176 : if (InArchiveRecovery)
417 heikki.linnakangas 959 ECB : {
417 heikki.linnakangas 960 CBC 73 : minRecoveryPoint = ControlFile->minRecoveryPoint;
417 heikki.linnakangas 961 GIC 73 : minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
417 heikki.linnakangas 962 ECB : }
963 : else
964 : {
417 heikki.linnakangas 965 GIC 1103 : minRecoveryPoint = InvalidXLogRecPtr;
966 1103 : minRecoveryPointTLI = 0;
417 heikki.linnakangas 967 ECB : }
968 :
969 : /*
970 : * Start recovery assuming that the final record isn't lost.
971 : */
417 heikki.linnakangas 972 GIC 1176 : abortedRecPtr = InvalidXLogRecPtr;
973 1176 : missingContrecPtr = InvalidXLogRecPtr;
417 heikki.linnakangas 974 ECB :
417 heikki.linnakangas 975 CBC 1176 : *wasShutdown_ptr = wasShutdown;
417 heikki.linnakangas 976 GIC 1176 : *haveBackupLabel_ptr = haveBackupLabel;
417 heikki.linnakangas 977 CBC 1176 : *haveTblspcMap_ptr = haveTblspcMap;
978 1176 : }
417 heikki.linnakangas 979 ECB :
980 : /*
981 : * See if there are any recovery signal files and if so, set state for
982 : * recovery.
983 : *
984 : * See if there is a recovery command file (recovery.conf), and if so
985 : * throw an ERROR since as of PG12 we no longer recognize that.
986 : */
987 : static void
417 heikki.linnakangas 988 GIC 1176 : readRecoverySignalFile(void)
989 : {
417 heikki.linnakangas 990 ECB : struct stat stat_buf;
991 :
417 heikki.linnakangas 992 GIC 1176 : if (IsBootstrapProcessingMode())
993 1103 : return;
417 heikki.linnakangas 994 ECB :
995 : /*
996 : * Check for old recovery API file: recovery.conf
997 : */
417 heikki.linnakangas 998 GIC 871 : if (stat(RECOVERY_COMMAND_FILE, &stat_buf) == 0)
417 heikki.linnakangas 999 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1000 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 1001 EUB : errmsg("using recovery command file \"%s\" is not supported",
1002 : RECOVERY_COMMAND_FILE)));
1003 :
1004 : /*
1005 : * Remove unused .done file, if present. Ignore if absent.
1006 : */
417 heikki.linnakangas 1007 GIC 871 : unlink(RECOVERY_COMMAND_DONE);
1008 :
417 heikki.linnakangas 1009 ECB : /*
1010 : * Check for recovery signal files and if found, fsync them since they
1011 : * represent server state information. We don't sweat too much about the
1012 : * possibility of fsync failure, however.
1013 : *
1014 : * If present, standby signal file takes precedence. If neither is present
1015 : * then we won't enter archive recovery.
1016 : */
417 heikki.linnakangas 1017 GIC 871 : if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
1018 : {
417 heikki.linnakangas 1019 ECB : int fd;
1020 :
417 heikki.linnakangas 1021 GIC 68 : fd = BasicOpenFilePerm(STANDBY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1022 : S_IRUSR | S_IWUSR);
417 heikki.linnakangas 1023 CBC 68 : if (fd >= 0)
1024 : {
1025 68 : (void) pg_fsync(fd);
417 heikki.linnakangas 1026 GIC 68 : close(fd);
417 heikki.linnakangas 1027 ECB : }
417 heikki.linnakangas 1028 CBC 68 : standby_signal_file_found = true;
1029 : }
1030 803 : else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
1031 : {
417 heikki.linnakangas 1032 ECB : int fd;
1033 :
417 heikki.linnakangas 1034 GIC 5 : fd = BasicOpenFilePerm(RECOVERY_SIGNAL_FILE, O_RDWR | PG_BINARY,
1035 : S_IRUSR | S_IWUSR);
417 heikki.linnakangas 1036 CBC 5 : if (fd >= 0)
1037 : {
1038 5 : (void) pg_fsync(fd);
417 heikki.linnakangas 1039 GIC 5 : close(fd);
417 heikki.linnakangas 1040 ECB : }
417 heikki.linnakangas 1041 CBC 5 : recovery_signal_file_found = true;
1042 : }
417 heikki.linnakangas 1043 ECB :
417 heikki.linnakangas 1044 GIC 871 : StandbyModeRequested = false;
1045 871 : ArchiveRecoveryRequested = false;
417 heikki.linnakangas 1046 CBC 871 : if (standby_signal_file_found)
417 heikki.linnakangas 1047 ECB : {
417 heikki.linnakangas 1048 CBC 68 : StandbyModeRequested = true;
417 heikki.linnakangas 1049 GIC 68 : ArchiveRecoveryRequested = true;
417 heikki.linnakangas 1050 ECB : }
417 heikki.linnakangas 1051 CBC 803 : else if (recovery_signal_file_found)
1052 : {
1053 5 : StandbyModeRequested = false;
417 heikki.linnakangas 1054 GIC 5 : ArchiveRecoveryRequested = true;
417 heikki.linnakangas 1055 ECB : }
1056 : else
417 heikki.linnakangas 1057 GIC 798 : return;
1058 :
417 heikki.linnakangas 1059 ECB : /*
1060 : * We don't support standby mode in standalone backends; that requires
1061 : * other processes such as the WAL receiver to be alive.
1062 : */
417 heikki.linnakangas 1063 GIC 73 : if (StandbyModeRequested && !IsUnderPostmaster)
417 heikki.linnakangas 1064 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1065 ECB : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
417 heikki.linnakangas 1066 EUB : errmsg("standby mode is not supported by single-user servers")));
1067 : }
1068 :
1069 : static void
417 heikki.linnakangas 1070 GIC 1176 : validateRecoveryParameters(void)
1071 : {
417 heikki.linnakangas 1072 CBC 1176 : if (!ArchiveRecoveryRequested)
417 heikki.linnakangas 1073 GIC 1103 : return;
417 heikki.linnakangas 1074 ECB :
1075 : /*
1076 : * Check for compulsory parameters
1077 : */
417 heikki.linnakangas 1078 GIC 73 : if (StandbyModeRequested)
1079 : {
417 heikki.linnakangas 1080 CBC 68 : if ((PrimaryConnInfo == NULL || strcmp(PrimaryConnInfo, "") == 0) &&
417 heikki.linnakangas 1081 GIC 6 : (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0))
417 heikki.linnakangas 1082 CBC 2 : ereport(WARNING,
417 heikki.linnakangas 1083 ECB : (errmsg("specified neither primary_conninfo nor restore_command"),
1084 : errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
1085 : }
1086 : else
1087 : {
417 heikki.linnakangas 1088 GIC 5 : if (recoveryRestoreCommand == NULL ||
1089 5 : strcmp(recoveryRestoreCommand, "") == 0)
417 heikki.linnakangas 1090 LBC 0 : ereport(FATAL,
417 heikki.linnakangas 1091 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
417 heikki.linnakangas 1092 EUB : errmsg("must specify restore_command when standby mode is not enabled")));
1093 : }
1094 :
1095 : /*
1096 : * Override any inconsistent requests. Note that this is a change of
1097 : * behaviour in 9.5; prior to this we simply ignored a request to pause if
1098 : * hot_standby = off, which was surprising behaviour.
1099 : */
417 heikki.linnakangas 1100 GIC 73 : if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PAUSE &&
1101 70 : !EnableHotStandby)
417 heikki.linnakangas 1102 CBC 2 : recoveryTargetAction = RECOVERY_TARGET_ACTION_SHUTDOWN;
417 heikki.linnakangas 1103 ECB :
1104 : /*
1105 : * Final parsing of recovery_target_time string; see also
1106 : * check_recovery_target_time().
1107 : */
417 heikki.linnakangas 1108 GIC 73 : if (recoveryTarget == RECOVERY_TARGET_TIME)
1109 : {
417 heikki.linnakangas 1110 LBC 0 : recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
1111 : CStringGetDatum(recovery_target_time_string),
417 heikki.linnakangas 1112 EUB : ObjectIdGetDatum(InvalidOid),
1113 : Int32GetDatum(-1)));
1114 : }
1115 :
1116 : /*
1117 : * If user specified recovery_target_timeline, validate it or compute the
1118 : * "latest" value. We can't do this until after we've gotten the restore
1119 : * command and set InArchiveRecovery, because we need to fetch timeline
1120 : * history files from the archive.
1121 : */
417 heikki.linnakangas 1122 GIC 73 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
1123 : {
417 heikki.linnakangas 1124 LBC 0 : TimeLineID rtli = recoveryTargetTLIRequested;
1125 :
417 heikki.linnakangas 1126 EUB : /* Timeline 1 does not have a history file, all else should */
417 heikki.linnakangas 1127 UIC 0 : if (rtli != 1 && !existsTimeLineHistory(rtli))
1128 0 : ereport(FATAL,
417 heikki.linnakangas 1129 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1130 : errmsg("recovery target timeline %u does not exist",
1131 : rtli)));
417 heikki.linnakangas 1132 UIC 0 : recoveryTargetTLI = rtli;
1133 : }
417 heikki.linnakangas 1134 GBC 73 : else if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
1135 : {
417 heikki.linnakangas 1136 ECB : /* We start the "latest" search from pg_control's timeline */
417 heikki.linnakangas 1137 GIC 73 : recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
1138 : }
417 heikki.linnakangas 1139 ECB : else
1140 : {
1141 : /*
1142 : * else we just use the recoveryTargetTLI as already read from
1143 : * ControlFile
1144 : */
417 heikki.linnakangas 1145 UIC 0 : Assert(recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_CONTROLFILE);
1146 : }
417 heikki.linnakangas 1147 EUB : }
1148 :
1149 : /*
1150 : * read_backup_label: check to see if a backup_label file is present
1151 : *
1152 : * If we see a backup_label during recovery, we assume that we are recovering
1153 : * from a backup dump file, and we therefore roll forward from the checkpoint
1154 : * identified by the label file, NOT what pg_control says. This avoids the
1155 : * problem that pg_control might have been archived one or more checkpoints
1156 : * later than the start of the dump, and so if we rely on it as the start
1157 : * point, we will fail to restore a consistent database state.
1158 : *
1159 : * Returns true if a backup_label was found (and fills the checkpoint
1160 : * location and TLI into *checkPointLoc and *backupLabelTLI, respectively);
1161 : * returns false if not. If this backup_label came from a streamed backup,
1162 : * *backupEndRequired is set to true. If this backup_label was created during
1163 : * recovery, *backupFromStandby is set to true.
1164 : *
1165 : * Also sets the global variables RedoStartLSN and RedoStartTLI with the LSN
1166 : * and TLI read from the backup file.
1167 : */
1168 : static bool
417 heikki.linnakangas 1169 GIC 1176 : read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
1170 : bool *backupEndRequired, bool *backupFromStandby)
417 heikki.linnakangas 1171 ECB : {
1172 : char startxlogfilename[MAXFNAMELEN];
1173 : TimeLineID tli_from_walseg,
1174 : tli_from_file;
1175 : FILE *lfp;
1176 : char ch;
1177 : char backuptype[20];
1178 : char backupfrom[20];
1179 : char backuplabel[MAXPGPATH];
1180 : char backuptime[128];
1181 : uint32 hi,
1182 : lo;
1183 :
1184 : /* suppress possible uninitialized-variable warnings */
417 heikki.linnakangas 1185 GIC 1176 : *checkPointLoc = InvalidXLogRecPtr;
1186 1176 : *backupLabelTLI = 0;
417 heikki.linnakangas 1187 CBC 1176 : *backupEndRequired = false;
1188 1176 : *backupFromStandby = false;
417 heikki.linnakangas 1189 ECB :
1190 : /*
1191 : * See if label file is present
1192 : */
417 heikki.linnakangas 1193 GIC 1176 : lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
1194 1176 : if (!lfp)
417 heikki.linnakangas 1195 ECB : {
417 heikki.linnakangas 1196 CBC 1125 : if (errno != ENOENT)
417 heikki.linnakangas 1197 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1198 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 1199 EUB : errmsg("could not read file \"%s\": %m",
1200 : BACKUP_LABEL_FILE)));
417 heikki.linnakangas 1201 GIC 1125 : return false; /* it's not there, all is fine */
1202 : }
417 heikki.linnakangas 1203 ECB :
1204 : /*
1205 : * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code
1206 : * is pretty crude, but we are not expecting any variability in the file
1207 : * format).
1208 : */
417 heikki.linnakangas 1209 GIC 51 : if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
1210 51 : &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
417 heikki.linnakangas 1211 LBC 0 : ereport(FATAL,
417 heikki.linnakangas 1212 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
417 heikki.linnakangas 1213 EUB : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
417 heikki.linnakangas 1214 GIC 51 : RedoStartLSN = ((uint64) hi) << 32 | lo;
1215 51 : RedoStartTLI = tli_from_walseg;
417 heikki.linnakangas 1216 CBC 51 : if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
1217 51 : &hi, &lo, &ch) != 3 || ch != '\n')
417 heikki.linnakangas 1218 LBC 0 : ereport(FATAL,
417 heikki.linnakangas 1219 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
417 heikki.linnakangas 1220 EUB : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
417 heikki.linnakangas 1221 GIC 51 : *checkPointLoc = ((uint64) hi) << 32 | lo;
1222 51 : *backupLabelTLI = tli_from_walseg;
417 heikki.linnakangas 1223 ECB :
1224 : /*
1225 : * BACKUP METHOD lets us know if this was a typical backup ("streamed",
1226 : * which could mean either pg_basebackup or the pg_backup_start/stop
1227 : * method was used) or if this label came from somewhere else (the only
1228 : * other option today being from pg_rewind). If this was a streamed
1229 : * backup then we know that we need to play through until we get to the
1230 : * end of the WAL which was generated during the backup (at which point we
1231 : * will have reached consistency and backupEndRequired will be reset to be
1232 : * false).
1233 : */
417 heikki.linnakangas 1234 GIC 51 : if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
1235 : {
417 heikki.linnakangas 1236 CBC 51 : if (strcmp(backuptype, "streamed") == 0)
417 heikki.linnakangas 1237 GIC 50 : *backupEndRequired = true;
417 heikki.linnakangas 1238 ECB : }
1239 :
1240 : /*
1241 : * BACKUP FROM lets us know if this was from a primary or a standby. If
1242 : * it was from a standby, we'll double-check that the control file state
1243 : * matches that of a standby.
1244 : */
417 heikki.linnakangas 1245 GIC 51 : if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
1246 : {
417 heikki.linnakangas 1247 CBC 51 : if (strcmp(backupfrom, "standby") == 0)
417 heikki.linnakangas 1248 GIC 2 : *backupFromStandby = true;
417 heikki.linnakangas 1249 ECB : }
1250 :
1251 : /*
1252 : * Parse START TIME and LABEL. Those are not mandatory fields for recovery
1253 : * but checking for their presence is useful for debugging and the next
1254 : * sanity checks. Cope also with the fact that the result buffers have a
1255 : * pre-allocated size, hence if the backup_label file has been generated
1256 : * with strings longer than the maximum assumed here an incorrect parsing
1257 : * happens. That's fine as only minor consistency checks are done
1258 : * afterwards.
1259 : */
417 heikki.linnakangas 1260 GIC 51 : if (fscanf(lfp, "START TIME: %127[^\n]\n", backuptime) == 1)
1261 51 : ereport(DEBUG1,
417 heikki.linnakangas 1262 ECB : (errmsg_internal("backup time %s in file \"%s\"",
1263 : backuptime, BACKUP_LABEL_FILE)));
1264 :
417 heikki.linnakangas 1265 GIC 51 : if (fscanf(lfp, "LABEL: %1023[^\n]\n", backuplabel) == 1)
1266 50 : ereport(DEBUG1,
417 heikki.linnakangas 1267 ECB : (errmsg_internal("backup label %s in file \"%s\"",
1268 : backuplabel, BACKUP_LABEL_FILE)));
1269 :
1270 : /*
1271 : * START TIMELINE is new as of 11. Its parsing is not mandatory, still use
1272 : * it as a sanity check if present.
1273 : */
417 heikki.linnakangas 1274 GIC 51 : if (fscanf(lfp, "START TIMELINE: %u\n", &tli_from_file) == 1)
1275 : {
417 heikki.linnakangas 1276 CBC 50 : if (tli_from_walseg != tli_from_file)
417 heikki.linnakangas 1277 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1278 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
417 heikki.linnakangas 1279 EUB : errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE),
1280 : errdetail("Timeline ID parsed is %u, but expected %u.",
1281 : tli_from_file, tli_from_walseg)));
1282 :
417 heikki.linnakangas 1283 GIC 50 : ereport(DEBUG1,
1284 : (errmsg_internal("backup timeline %u in file \"%s\"",
417 heikki.linnakangas 1285 ECB : tli_from_file, BACKUP_LABEL_FILE)));
1286 : }
1287 :
417 heikki.linnakangas 1288 GIC 51 : if (ferror(lfp) || FreeFile(lfp))
417 heikki.linnakangas 1289 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1290 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 1291 EUB : errmsg("could not read file \"%s\": %m",
1292 : BACKUP_LABEL_FILE)));
1293 :
417 heikki.linnakangas 1294 GIC 51 : return true;
1295 : }
417 heikki.linnakangas 1296 ECB :
1297 : /*
1298 : * read_tablespace_map: check to see if a tablespace_map file is present
1299 : *
1300 : * If we see a tablespace_map file during recovery, we assume that we are
1301 : * recovering from a backup dump file, and we therefore need to create symlinks
1302 : * as per the information present in tablespace_map file.
1303 : *
1304 : * Returns true if a tablespace_map file was found (and fills *tablespaces
1305 : * with a tablespaceinfo struct for each tablespace listed in the file);
1306 : * returns false if not.
1307 : */
1308 : static bool
417 heikki.linnakangas 1309 GIC 51 : read_tablespace_map(List **tablespaces)
1310 : {
417 heikki.linnakangas 1311 ECB : tablespaceinfo *ti;
1312 : FILE *lfp;
1313 : char str[MAXPGPATH];
1314 : int ch,
1315 : i,
1316 : n;
1317 : bool was_backslash;
1318 :
1319 : /*
1320 : * See if tablespace_map file is present
1321 : */
417 heikki.linnakangas 1322 GIC 51 : lfp = AllocateFile(TABLESPACE_MAP, "r");
1323 51 : if (!lfp)
417 heikki.linnakangas 1324 ECB : {
417 heikki.linnakangas 1325 CBC 50 : if (errno != ENOENT)
417 heikki.linnakangas 1326 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1327 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 1328 EUB : errmsg("could not read file \"%s\": %m",
1329 : TABLESPACE_MAP)));
417 heikki.linnakangas 1330 GIC 50 : return false; /* it's not there, all is fine */
1331 : }
417 heikki.linnakangas 1332 ECB :
1333 : /*
1334 : * Read and parse the link name and path lines from tablespace_map file
1335 : * (this code is pretty crude, but we are not expecting any variability in
1336 : * the file format). De-escape any backslashes that were inserted.
1337 : */
417 heikki.linnakangas 1338 GIC 1 : i = 0;
1339 1 : was_backslash = false;
417 heikki.linnakangas 1340 CBC 46 : while ((ch = fgetc(lfp)) != EOF)
417 heikki.linnakangas 1341 ECB : {
417 heikki.linnakangas 1342 CBC 45 : if (!was_backslash && (ch == '\n' || ch == '\r'))
1343 : {
1344 1 : if (i == 0)
417 heikki.linnakangas 1345 UIC 0 : continue; /* \r immediately followed by \n */
417 heikki.linnakangas 1346 ECB :
417 heikki.linnakangas 1347 EUB : /*
1348 : * The de-escaped line should contain an OID followed by exactly
1349 : * one space followed by a path. The path might start with
1350 : * spaces, so don't be too liberal about parsing.
1351 : */
417 heikki.linnakangas 1352 GIC 1 : str[i] = '\0';
1353 1 : n = 0;
417 heikki.linnakangas 1354 CBC 6 : while (str[n] && str[n] != ' ')
1355 5 : n++;
1356 1 : if (n < 1 || n >= i - 1)
417 heikki.linnakangas 1357 LBC 0 : ereport(FATAL,
417 heikki.linnakangas 1358 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
417 heikki.linnakangas 1359 EUB : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
417 heikki.linnakangas 1360 GIC 1 : str[n++] = '\0';
1361 :
417 heikki.linnakangas 1362 CBC 1 : ti = palloc0(sizeof(tablespaceinfo));
417 heikki.linnakangas 1363 GIC 1 : ti->oid = pstrdup(str);
417 heikki.linnakangas 1364 CBC 1 : ti->path = pstrdup(str + n);
1365 1 : *tablespaces = lappend(*tablespaces, ti);
417 heikki.linnakangas 1366 ECB :
417 heikki.linnakangas 1367 CBC 1 : i = 0;
417 heikki.linnakangas 1368 GIC 1 : continue;
417 heikki.linnakangas 1369 ECB : }
417 heikki.linnakangas 1370 CBC 44 : else if (!was_backslash && ch == '\\')
417 heikki.linnakangas 1371 UIC 0 : was_backslash = true;
417 heikki.linnakangas 1372 ECB : else
417 heikki.linnakangas 1373 EUB : {
417 heikki.linnakangas 1374 GIC 44 : if (i < sizeof(str) - 1)
1375 44 : str[i++] = ch;
417 heikki.linnakangas 1376 CBC 44 : was_backslash = false;
417 heikki.linnakangas 1377 ECB : }
1378 : }
1379 :
417 heikki.linnakangas 1380 GIC 1 : if (i != 0 || was_backslash) /* last line not terminated? */
417 heikki.linnakangas 1381 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1382 ECB : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
417 heikki.linnakangas 1383 EUB : errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
1384 :
417 heikki.linnakangas 1385 GIC 1 : if (ferror(lfp) || FreeFile(lfp))
417 heikki.linnakangas 1386 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1387 ECB : (errcode_for_file_access(),
417 heikki.linnakangas 1388 EUB : errmsg("could not read file \"%s\": %m",
1389 : TABLESPACE_MAP)));
1390 :
417 heikki.linnakangas 1391 GIC 1 : return true;
1392 : }
417 heikki.linnakangas 1393 ECB :
1394 : /*
1395 : * Finish WAL recovery.
1396 : *
1397 : * This does not close the 'xlogreader' yet, because in some cases the caller
1398 : * still wants to re-read the last checkpoint record by calling
1399 : * ReadCheckPointRecord().
1400 : *
1401 : * Returns the position of the last valid or applied record, after which new
1402 : * WAL should be appended, information about why recovery was ended, and some
1403 : * other things. See the WalRecoveryResult struct for details.
1404 : */
1405 : EndOfWalRecoveryInfo *
417 heikki.linnakangas 1406 GIC 1142 : FinishWalRecovery(void)
1407 : {
417 heikki.linnakangas 1408 CBC 1142 : EndOfWalRecoveryInfo *result = palloc(sizeof(EndOfWalRecoveryInfo));
1409 : XLogRecPtr lastRec;
417 heikki.linnakangas 1410 ECB : TimeLineID lastRecTLI;
1411 : XLogRecPtr endOfLog;
1412 :
1413 : /*
1414 : * Kill WAL receiver, if it's still running, before we continue to write
1415 : * the startup checkpoint and aborted-contrecord records. It will trump
1416 : * over these records and subsequent ones if it's still alive when we
1417 : * start writing WAL.
1418 : */
417 heikki.linnakangas 1419 GIC 1142 : XLogShutdownWalRcv();
1420 :
417 heikki.linnakangas 1421 ECB : /*
1422 : * We are now done reading the xlog from stream. Turn off streaming
1423 : * recovery to force fetching the files (which would be required at end of
1424 : * recovery, e.g., timeline history file) from archive or pg_wal.
1425 : *
1426 : * Note that standby mode must be turned off after killing WAL receiver,
1427 : * i.e., calling XLogShutdownWalRcv().
1428 : */
417 heikki.linnakangas 1429 GIC 1142 : Assert(!WalRcvStreaming());
1430 1142 : StandbyMode = false;
417 heikki.linnakangas 1431 ECB :
1432 : /*
1433 : * Determine where to start writing WAL next.
1434 : *
1435 : * Re-fetch the last valid or last applied record, so we can identify the
1436 : * exact endpoint of what we consider the valid portion of WAL. There may
1437 : * be an incomplete continuation record after that, in which case
1438 : * 'abortedRecPtr' and 'missingContrecPtr' are set and the caller will
1439 : * write a special OVERWRITE_CONTRECORD message to mark that the rest of
1440 : * it is intentionally missing. See CreateOverwriteContrecordRecord().
1441 : *
1442 : * An important side-effect of this is to load the last page into
1443 : * xlogreader. The caller uses it to initialize the WAL for writing.
1444 : */
417 heikki.linnakangas 1445 GIC 1142 : if (!InRecovery)
1446 : {
417 heikki.linnakangas 1447 CBC 1025 : lastRec = CheckPointLoc;
417 heikki.linnakangas 1448 GIC 1025 : lastRecTLI = CheckPointTLI;
417 heikki.linnakangas 1449 ECB : }
1450 : else
1451 : {
417 heikki.linnakangas 1452 GIC 117 : lastRec = XLogRecoveryCtl->lastReplayedReadRecPtr;
1453 117 : lastRecTLI = XLogRecoveryCtl->lastReplayedTLI;
417 heikki.linnakangas 1454 ECB : }
367 tmunro 1455 CBC 1142 : XLogPrefetcherBeginRead(xlogprefetcher, lastRec);
367 tmunro 1456 GIC 1142 : (void) ReadRecord(xlogprefetcher, PANIC, false, lastRecTLI);
417 heikki.linnakangas 1457 CBC 1142 : endOfLog = xlogreader->EndRecPtr;
417 heikki.linnakangas 1458 ECB :
1459 : /*
1460 : * Remember the TLI in the filename of the XLOG segment containing the
1461 : * end-of-log. It could be different from the timeline that endOfLog
1462 : * nominally belongs to, if there was a timeline switch in that segment,
1463 : * and we were reading the old WAL from a segment belonging to a higher
1464 : * timeline.
1465 : */
417 heikki.linnakangas 1466 GIC 1142 : result->endOfLogTLI = xlogreader->seg.ws_tli;
1467 :
417 heikki.linnakangas 1468 CBC 1142 : if (ArchiveRecoveryRequested)
1469 : {
417 heikki.linnakangas 1470 ECB : /*
1471 : * We are no longer in archive recovery state.
1472 : *
1473 : * We are now done reading the old WAL. Turn off archive fetching if
1474 : * it was active.
1475 : */
417 heikki.linnakangas 1476 GIC 39 : Assert(InArchiveRecovery);
1477 39 : InArchiveRecovery = false;
417 heikki.linnakangas 1478 ECB :
1479 : /*
1480 : * If the ending log segment is still open, close it (to avoid
1481 : * problems on Windows with trying to rename or delete an open file).
1482 : */
417 heikki.linnakangas 1483 GIC 39 : if (readFile >= 0)
1484 : {
417 heikki.linnakangas 1485 CBC 39 : close(readFile);
417 heikki.linnakangas 1486 GIC 39 : readFile = -1;
417 heikki.linnakangas 1487 ECB : }
1488 : }
1489 :
1490 : /*
1491 : * Copy the last partial block to the caller, for initializing the WAL
1492 : * buffer for appending new WAL.
1493 : */
417 heikki.linnakangas 1494 GIC 1142 : if (endOfLog % XLOG_BLCKSZ != 0)
1495 : {
417 heikki.linnakangas 1496 ECB : char *page;
1497 : int len;
1498 : XLogRecPtr pageBeginPtr;
1499 :
417 heikki.linnakangas 1500 GIC 1129 : pageBeginPtr = endOfLog - (endOfLog % XLOG_BLCKSZ);
1501 1129 : Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size));
417 heikki.linnakangas 1502 ECB :
1503 : /* Copy the valid part of the last block */
417 heikki.linnakangas 1504 GIC 1129 : len = endOfLog % XLOG_BLCKSZ;
1505 1129 : page = palloc(len);
417 heikki.linnakangas 1506 CBC 1129 : memcpy(page, xlogreader->readBuf, len);
417 heikki.linnakangas 1507 ECB :
417 heikki.linnakangas 1508 CBC 1129 : result->lastPageBeginPtr = pageBeginPtr;
417 heikki.linnakangas 1509 GIC 1129 : result->lastPage = page;
417 heikki.linnakangas 1510 ECB : }
1511 : else
1512 : {
1513 : /* There is no partial block to copy. */
417 heikki.linnakangas 1514 GIC 13 : result->lastPageBeginPtr = endOfLog;
1515 13 : result->lastPage = NULL;
417 heikki.linnakangas 1516 ECB : }
1517 :
1518 : /*
1519 : * Create a comment for the history file to explain why and where timeline
1520 : * changed.
1521 : */
417 heikki.linnakangas 1522 GIC 1142 : result->recoveryStopReason = getRecoveryStopReason();
1523 :
417 heikki.linnakangas 1524 CBC 1142 : result->lastRec = lastRec;
417 heikki.linnakangas 1525 GIC 1142 : result->lastRecTLI = lastRecTLI;
417 heikki.linnakangas 1526 CBC 1142 : result->endOfLog = endOfLog;
417 heikki.linnakangas 1527 ECB :
417 heikki.linnakangas 1528 CBC 1142 : result->abortedRecPtr = abortedRecPtr;
417 heikki.linnakangas 1529 GIC 1142 : result->missingContrecPtr = missingContrecPtr;
417 heikki.linnakangas 1530 ECB :
417 heikki.linnakangas 1531 CBC 1142 : result->standby_signal_file_found = standby_signal_file_found;
417 heikki.linnakangas 1532 GIC 1142 : result->recovery_signal_file_found = recovery_signal_file_found;
417 heikki.linnakangas 1533 ECB :
417 heikki.linnakangas 1534 CBC 1142 : return result;
1535 : }
417 heikki.linnakangas 1536 ECB :
1537 : /*
1538 : * Clean up the WAL reader and leftovers from restoring WAL from archive
1539 : */
1540 : void
417 heikki.linnakangas 1541 GIC 1142 : ShutdownWalRecovery(void)
1542 : {
417 heikki.linnakangas 1543 ECB : char recoveryPath[MAXPGPATH];
1544 :
1545 : /* Final update of pg_stat_recovery_prefetch. */
367 tmunro 1546 GIC 1142 : XLogPrefetcherComputeStats(xlogprefetcher);
1547 :
417 heikki.linnakangas 1548 ECB : /* Shut down xlogreader */
417 heikki.linnakangas 1549 GIC 1142 : if (readFile >= 0)
1550 : {
417 heikki.linnakangas 1551 CBC 1103 : close(readFile);
417 heikki.linnakangas 1552 GIC 1103 : readFile = -1;
417 heikki.linnakangas 1553 ECB : }
417 heikki.linnakangas 1554 CBC 1142 : XLogReaderFree(xlogreader);
367 tmunro 1555 GIC 1142 : XLogPrefetcherFree(xlogprefetcher);
417 heikki.linnakangas 1556 ECB :
417 heikki.linnakangas 1557 CBC 1142 : if (ArchiveRecoveryRequested)
1558 : {
417 heikki.linnakangas 1559 ECB : /*
1560 : * Since there might be a partial WAL segment named RECOVERYXLOG, get
1561 : * rid of it.
1562 : */
417 heikki.linnakangas 1563 GIC 39 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
1564 39 : unlink(recoveryPath); /* ignore any error */
417 heikki.linnakangas 1565 ECB :
1566 : /* Get rid of any remaining recovered timeline-history file, too */
417 heikki.linnakangas 1567 GIC 39 : snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
1568 39 : unlink(recoveryPath); /* ignore any error */
417 heikki.linnakangas 1569 ECB : }
1570 :
1571 : /*
1572 : * We don't need the latch anymore. It's not strictly necessary to disown
1573 : * it, but let's do it for the sake of tidiness.
1574 : */
417 heikki.linnakangas 1575 GIC 1142 : if (ArchiveRecoveryRequested)
1576 39 : DisownLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
417 heikki.linnakangas 1577 CBC 1142 : }
417 heikki.linnakangas 1578 ECB :
1579 : /*
1580 : * Perform WAL recovery.
1581 : *
1582 : * If the system was shut down cleanly, this is never called.
1583 : */
1584 : void
417 heikki.linnakangas 1585 GIC 151 : PerformWalRecovery(void)
1586 : {
417 heikki.linnakangas 1587 ECB : XLogRecord *record;
417 heikki.linnakangas 1588 GIC 151 : bool reachedRecoveryTarget = false;
1589 : TimeLineID replayTLI;
417 heikki.linnakangas 1590 ECB :
1591 : /*
1592 : * Initialize shared variables for tracking progress of WAL replay, as if
1593 : * we had just replayed the record before the REDO location (or the
1594 : * checkpoint record itself, if it's a shutdown checkpoint).
1595 : */
417 heikki.linnakangas 1596 GIC 151 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1597 151 : if (RedoStartLSN < CheckPointLoc)
417 heikki.linnakangas 1598 ECB : {
417 heikki.linnakangas 1599 CBC 79 : XLogRecoveryCtl->lastReplayedReadRecPtr = InvalidXLogRecPtr;
417 heikki.linnakangas 1600 GIC 79 : XLogRecoveryCtl->lastReplayedEndRecPtr = RedoStartLSN;
417 heikki.linnakangas 1601 CBC 79 : XLogRecoveryCtl->lastReplayedTLI = RedoStartTLI;
417 heikki.linnakangas 1602 ECB : }
1603 : else
1604 : {
417 heikki.linnakangas 1605 GIC 72 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
1606 72 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
417 heikki.linnakangas 1607 CBC 72 : XLogRecoveryCtl->lastReplayedTLI = CheckPointTLI;
417 heikki.linnakangas 1608 ECB : }
417 heikki.linnakangas 1609 CBC 151 : XLogRecoveryCtl->replayEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
417 heikki.linnakangas 1610 GIC 151 : XLogRecoveryCtl->replayEndTLI = XLogRecoveryCtl->lastReplayedTLI;
417 heikki.linnakangas 1611 CBC 151 : XLogRecoveryCtl->recoveryLastXTime = 0;
1612 151 : XLogRecoveryCtl->currentChunkStartTime = 0;
1613 151 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
1614 151 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 1615 ECB :
1616 : /* Also ensure XLogReceiptTime has a sane value */
417 heikki.linnakangas 1617 GIC 151 : XLogReceiptTime = GetCurrentTimestamp();
1618 :
417 heikki.linnakangas 1619 ECB : /*
1620 : * Let postmaster know we've started redo now, so that it can launch the
1621 : * archiver if necessary.
1622 : */
417 heikki.linnakangas 1623 GIC 151 : if (IsUnderPostmaster)
1624 142 : SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
417 heikki.linnakangas 1625 ECB :
1626 : /*
1627 : * Allow read-only connections immediately if we're consistent already.
1628 : */
417 heikki.linnakangas 1629 GIC 151 : CheckRecoveryConsistency();
1630 :
417 heikki.linnakangas 1631 ECB : /*
1632 : * Find the first record that logically follows the checkpoint --- it
1633 : * might physically precede it, though.
1634 : */
417 heikki.linnakangas 1635 GIC 151 : if (RedoStartLSN < CheckPointLoc)
1636 : {
417 heikki.linnakangas 1637 ECB : /* back up to find the record */
417 heikki.linnakangas 1638 GIC 79 : replayTLI = RedoStartTLI;
367 tmunro 1639 79 : XLogPrefetcherBeginRead(xlogprefetcher, RedoStartLSN);
367 tmunro 1640 CBC 79 : record = ReadRecord(xlogprefetcher, PANIC, false, replayTLI);
417 heikki.linnakangas 1641 ECB : }
1642 : else
1643 : {
1644 : /* just have to read next record after CheckPoint */
417 heikki.linnakangas 1645 GIC 72 : Assert(xlogreader->ReadRecPtr == CheckPointLoc);
1646 72 : replayTLI = CheckPointTLI;
367 tmunro 1647 CBC 72 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
417 heikki.linnakangas 1648 ECB : }
1649 :
417 heikki.linnakangas 1650 GIC 151 : if (record != NULL)
1651 : {
417 heikki.linnakangas 1652 ECB : TimestampTz xtime;
1653 : PGRUsage ru0;
1654 :
417 heikki.linnakangas 1655 GIC 141 : pg_rusage_init(&ru0);
1656 :
417 heikki.linnakangas 1657 CBC 141 : InRedo = true;
1658 :
368 jdavis 1659 141 : RmgrStartup();
1660 :
417 heikki.linnakangas 1661 141 : ereport(LOG,
1662 : (errmsg("redo starts at %X/%X",
417 heikki.linnakangas 1663 ECB : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
1664 :
1665 : /* Prepare to report progress of the redo phase. */
417 heikki.linnakangas 1666 GIC 141 : if (!StandbyMode)
1667 78 : begin_startup_progress_phase();
417 heikki.linnakangas 1668 ECB :
1669 : /*
1670 : * main redo apply loop
1671 : */
1672 : do
1673 : {
417 heikki.linnakangas 1674 GIC 2504088 : if (!StandbyMode)
1675 238436 : ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
417 heikki.linnakangas 1676 ECB : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
1677 :
1678 : #ifdef WAL_DEBUG
1679 : if (XLOG_DEBUG ||
1680 : (record->xl_rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
1681 : (record->xl_rmid != RM_XACT_ID && trace_recovery_messages <= DEBUG3))
1682 : {
1683 : StringInfoData buf;
1684 :
1685 : initStringInfo(&buf);
1686 : appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
1687 : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1688 : LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
1689 : xlog_outrec(&buf, xlogreader);
1690 : appendStringInfoString(&buf, " - ");
1691 : xlog_outdesc(&buf, xlogreader);
1692 : elog(LOG, "%s", buf.data);
1693 : pfree(buf.data);
1694 : }
1695 : #endif
1696 :
1697 : /* Handle interrupt signals of startup process */
417 heikki.linnakangas 1698 GIC 2504088 : HandleStartupProcInterrupts();
1699 :
417 heikki.linnakangas 1700 ECB : /*
1701 : * Pause WAL replay, if requested by a hot-standby session via
1702 : * SetRecoveryPause().
1703 : *
1704 : * Note that we intentionally don't take the info_lck spinlock
1705 : * here. We might therefore read a slightly stale value of the
1706 : * recoveryPause flag, but it can't be very stale (no worse than
1707 : * the last spinlock we did acquire). Since a pause request is a
1708 : * pretty asynchronous thing anyway, possibly responding to it one
1709 : * WAL record later than we otherwise would is a minor issue, so
1710 : * it doesn't seem worth adding another spinlock cycle to prevent
1711 : * that.
1712 : */
417 heikki.linnakangas 1713 GIC 2504088 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1714 : RECOVERY_NOT_PAUSED)
417 heikki.linnakangas 1715 LBC 0 : recoveryPausesHere(false);
1716 :
417 heikki.linnakangas 1717 EUB : /*
1718 : * Have we reached our recovery target?
1719 : */
417 heikki.linnakangas 1720 GIC 2504088 : if (recoveryStopsBefore(xlogreader))
1721 : {
417 heikki.linnakangas 1722 LBC 0 : reachedRecoveryTarget = true;
417 heikki.linnakangas 1723 UIC 0 : break;
417 heikki.linnakangas 1724 EUB : }
1725 :
1726 : /*
1727 : * If we've been asked to lag the primary, wait on latch until
1728 : * enough time has passed.
1729 : */
417 heikki.linnakangas 1730 GIC 2504088 : if (recoveryApplyDelay(xlogreader))
1731 : {
417 heikki.linnakangas 1732 ECB : /*
1733 : * We test for paused recovery again here. If user sets
1734 : * delayed apply, it may be because they expect to pause
1735 : * recovery in case of problems, so we must test again here
1736 : * otherwise pausing during the delay-wait wouldn't work.
1737 : */
417 heikki.linnakangas 1738 UIC 0 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
1739 : RECOVERY_NOT_PAUSED)
417 heikki.linnakangas 1740 UBC 0 : recoveryPausesHere(false);
1741 : }
417 heikki.linnakangas 1742 EUB :
1743 : /*
1744 : * Apply the record
1745 : */
417 heikki.linnakangas 1746 GIC 2504088 : ApplyWalRecord(xlogreader, record, &replayTLI);
1747 :
417 heikki.linnakangas 1748 ECB : /* Exit loop if we reached inclusive recovery target */
417 heikki.linnakangas 1749 GIC 2504086 : if (recoveryStopsAfter(xlogreader))
1750 : {
417 heikki.linnakangas 1751 CBC 2 : reachedRecoveryTarget = true;
417 heikki.linnakangas 1752 GIC 2 : break;
417 heikki.linnakangas 1753 ECB : }
1754 :
1755 : /* Else, try to fetch the next WAL record */
367 tmunro 1756 GIC 2504084 : record = ReadRecord(xlogprefetcher, LOG, false, replayTLI);
417 heikki.linnakangas 1757 2504053 : } while (record != NULL);
417 heikki.linnakangas 1758 ECB :
1759 : /*
1760 : * end of main redo apply loop
1761 : */
1762 :
417 heikki.linnakangas 1763 GIC 108 : if (reachedRecoveryTarget)
1764 : {
417 heikki.linnakangas 1765 CBC 2 : if (!reachedConsistency)
417 heikki.linnakangas 1766 UIC 0 : ereport(FATAL,
417 heikki.linnakangas 1767 ECB : (errmsg("requested recovery stop point is before consistent recovery point")));
417 heikki.linnakangas 1768 EUB :
1769 : /*
1770 : * This is the last point where we can restart recovery with a new
1771 : * recovery target, if we shutdown and begin again. After this,
1772 : * Resource Managers may choose to do permanent corrective actions
1773 : * at end of recovery.
1774 : */
417 heikki.linnakangas 1775 GIC 2 : switch (recoveryTargetAction)
1776 : {
417 heikki.linnakangas 1777 LBC 0 : case RECOVERY_TARGET_ACTION_SHUTDOWN:
1778 :
417 heikki.linnakangas 1779 EUB : /*
1780 : * exit with special return code to request shutdown of
1781 : * postmaster. Log messages issued from postmaster.
1782 : */
417 heikki.linnakangas 1783 UIC 0 : proc_exit(3);
1784 :
417 heikki.linnakangas 1785 UBC 0 : case RECOVERY_TARGET_ACTION_PAUSE:
417 heikki.linnakangas 1786 UIC 0 : SetRecoveryPause(true);
417 heikki.linnakangas 1787 UBC 0 : recoveryPausesHere(true);
417 heikki.linnakangas 1788 EUB :
1789 : /* drop into promote */
1790 :
417 heikki.linnakangas 1791 GIC 2 : case RECOVERY_TARGET_ACTION_PROMOTE:
1792 2 : break;
417 heikki.linnakangas 1793 ECB : }
1794 : }
1795 :
368 jdavis 1796 GIC 108 : RmgrCleanup();
1797 :
417 heikki.linnakangas 1798 CBC 108 : ereport(LOG,
1799 : (errmsg("redo done at %X/%X system usage: %s",
417 heikki.linnakangas 1800 ECB : LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
1801 : pg_rusage_show(&ru0))));
417 heikki.linnakangas 1802 GIC 108 : xtime = GetLatestXTime();
1803 108 : if (xtime)
417 heikki.linnakangas 1804 CBC 28 : ereport(LOG,
417 heikki.linnakangas 1805 ECB : (errmsg("last completed transaction was at log time %s",
1806 : timestamptz_to_str(xtime))));
1807 :
417 heikki.linnakangas 1808 GIC 108 : InRedo = false;
1809 : }
417 heikki.linnakangas 1810 ECB : else
1811 : {
1812 : /* there are no WAL records following the checkpoint */
417 heikki.linnakangas 1813 GIC 10 : ereport(LOG,
1814 : (errmsg("redo is not required")));
417 heikki.linnakangas 1815 ECB : }
1816 :
1817 : /*
1818 : * This check is intentionally after the above log messages that indicate
1819 : * how far recovery went.
1820 : */
417 heikki.linnakangas 1821 GIC 118 : if (ArchiveRecoveryRequested &&
1822 40 : recoveryTarget != RECOVERY_TARGET_UNSET &&
417 heikki.linnakangas 1823 CBC 3 : !reachedRecoveryTarget)
1824 1 : ereport(FATAL,
417 heikki.linnakangas 1825 ECB : (errmsg("recovery ended before configured recovery target was reached")));
417 heikki.linnakangas 1826 CBC 117 : }
1827 :
417 heikki.linnakangas 1828 ECB : /*
1829 : * Subroutine of PerformWalRecovery, to apply one WAL record.
1830 : */
1831 : static void
417 heikki.linnakangas 1832 GIC 2504088 : ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI)
1833 : {
417 heikki.linnakangas 1834 ECB : ErrorContextCallback errcallback;
417 heikki.linnakangas 1835 GIC 2504088 : bool switchedTLI = false;
1836 :
417 heikki.linnakangas 1837 ECB : /* Setup error traceback support for ereport() */
417 heikki.linnakangas 1838 GIC 2504088 : errcallback.callback = rm_redo_error_callback;
1839 2504088 : errcallback.arg = (void *) xlogreader;
417 heikki.linnakangas 1840 CBC 2504088 : errcallback.previous = error_context_stack;
1841 2504088 : error_context_stack = &errcallback;
417 heikki.linnakangas 1842 ECB :
1843 : /*
1844 : * ShmemVariableCache->nextXid must be beyond record's xid.
1845 : */
417 heikki.linnakangas 1846 GIC 2504088 : AdvanceNextFullTransactionIdPastXid(record->xl_xid);
1847 :
417 heikki.linnakangas 1848 ECB : /*
1849 : * Before replaying this record, check if this record causes the current
1850 : * timeline to change. The record is already considered to be part of the
1851 : * new timeline, so we update replayTLI before replaying it. That's
1852 : * important so that replayEndTLI, which is recorded as the minimum
1853 : * recovery point's TLI if recovery stops after this record, is set
1854 : * correctly.
1855 : */
417 heikki.linnakangas 1856 GIC 2504088 : if (record->xl_rmid == RM_XLOG_ID)
1857 : {
417 heikki.linnakangas 1858 CBC 28161 : TimeLineID newReplayTLI = *replayTLI;
417 heikki.linnakangas 1859 GIC 28161 : TimeLineID prevReplayTLI = *replayTLI;
417 heikki.linnakangas 1860 CBC 28161 : uint8 info = record->xl_info & ~XLR_INFO_MASK;
417 heikki.linnakangas 1861 ECB :
417 heikki.linnakangas 1862 CBC 28161 : if (info == XLOG_CHECKPOINT_SHUTDOWN)
1863 : {
417 heikki.linnakangas 1864 ECB : CheckPoint checkPoint;
1865 :
417 heikki.linnakangas 1866 GIC 25 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
1867 25 : newReplayTLI = checkPoint.ThisTimeLineID;
417 heikki.linnakangas 1868 CBC 25 : prevReplayTLI = checkPoint.PrevTimeLineID;
417 heikki.linnakangas 1869 ECB : }
417 heikki.linnakangas 1870 CBC 28136 : else if (info == XLOG_END_OF_RECOVERY)
1871 : {
417 heikki.linnakangas 1872 ECB : xl_end_of_recovery xlrec;
1873 :
417 heikki.linnakangas 1874 GIC 8 : memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
1875 8 : newReplayTLI = xlrec.ThisTimeLineID;
417 heikki.linnakangas 1876 CBC 8 : prevReplayTLI = xlrec.PrevTimeLineID;
417 heikki.linnakangas 1877 ECB : }
1878 :
417 heikki.linnakangas 1879 GIC 28161 : if (newReplayTLI != *replayTLI)
1880 : {
417 heikki.linnakangas 1881 ECB : /* Check that it's OK to switch to this TLI */
417 heikki.linnakangas 1882 GIC 9 : checkTimeLineSwitch(xlogreader->EndRecPtr,
1883 : newReplayTLI, prevReplayTLI, *replayTLI);
417 heikki.linnakangas 1884 ECB :
1885 : /* Following WAL records should be run with new TLI */
417 heikki.linnakangas 1886 GIC 9 : *replayTLI = newReplayTLI;
1887 9 : switchedTLI = true;
417 heikki.linnakangas 1888 ECB : }
1889 : }
1890 :
1891 : /*
1892 : * Update shared replayEndRecPtr before replaying this record, so that
1893 : * XLogFlush will update minRecoveryPoint correctly.
1894 : */
417 heikki.linnakangas 1895 GIC 2504088 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1896 2504088 : XLogRecoveryCtl->replayEndRecPtr = xlogreader->EndRecPtr;
417 heikki.linnakangas 1897 CBC 2504088 : XLogRecoveryCtl->replayEndTLI = *replayTLI;
1898 2504088 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 1899 ECB :
1900 : /*
1901 : * If we are attempting to enter Hot Standby mode, process XIDs we see
1902 : */
417 heikki.linnakangas 1903 GIC 2504088 : if (standbyState >= STANDBY_INITIALIZED &&
1904 2281481 : TransactionIdIsValid(record->xl_xid))
417 heikki.linnakangas 1905 CBC 2242538 : RecordKnownAssignedTransactionIds(record->xl_xid);
417 heikki.linnakangas 1906 ECB :
1907 : /*
1908 : * Some XLOG record types that are related to recovery are processed
1909 : * directly here, rather than in xlog_redo()
1910 : */
417 heikki.linnakangas 1911 GIC 2504088 : if (record->xl_rmid == RM_XLOG_ID)
1912 28161 : xlogrecovery_redo(xlogreader, *replayTLI);
417 heikki.linnakangas 1913 ECB :
1914 : /* Now apply the WAL record itself */
368 jdavis 1915 GIC 2504088 : GetRmgr(record->xl_rmid).rm_redo(xlogreader);
1916 :
417 heikki.linnakangas 1917 ECB : /*
1918 : * After redo, check whether the backup pages associated with the WAL
1919 : * record are consistent with the existing pages. This check is done only
1920 : * if consistency check is enabled for this record.
1921 : */
417 heikki.linnakangas 1922 GIC 2504086 : if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
417 heikki.linnakangas 1923 UIC 0 : verifyBackupPageConsistency(xlogreader);
417 heikki.linnakangas 1924 ECB :
417 heikki.linnakangas 1925 EUB : /* Pop the error context stack */
417 heikki.linnakangas 1926 GIC 2504086 : error_context_stack = errcallback.previous;
1927 :
417 heikki.linnakangas 1928 ECB : /*
1929 : * Update lastReplayedEndRecPtr after this record has been successfully
1930 : * replayed.
1931 : */
417 heikki.linnakangas 1932 GIC 2504086 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
1933 2504086 : XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
417 heikki.linnakangas 1934 CBC 2504086 : XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
1935 2504086 : XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
1936 2504086 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 1937 ECB :
1938 : /* ------
1939 : * Wakeup walsenders:
1940 : *
1941 : * On the standby, the WAL is flushed first (which will only wake up
1942 : * physical walsenders) and then applied, which will only wake up logical
1943 : * walsenders.
1944 : *
1945 : * Indeed, logical walsenders on standby can't decode and send data until
1946 : * it's been applied.
1947 : *
1948 : * Physical walsenders don't need to be woken up during replay unless
1949 : * cascading replication is allowed and time line change occurred (so that
1950 : * they can notice that they are on a new time line).
1951 : *
1952 : * That's why the wake up conditions are for:
1953 : *
1954 : * - physical walsenders in case of new time line and cascade
1955 : * replication is allowed
1956 : * - logical walsenders in case cascade replication is allowed (could not
1957 : * be created otherwise)
1958 : * ------
1959 : */
1 andres 1960 GNC 2504086 : if (AllowCascadeReplication())
1961 2332490 : WalSndWakeup(switchedTLI, true);
1962 :
417 heikki.linnakangas 1963 ECB : /*
1964 : * If rm_redo called XLogRequestWalReceiverReply, then we wake up the
1965 : * receiver so that it notices the updated lastReplayedEndRecPtr and sends
1966 : * a reply to the primary.
1967 : */
417 heikki.linnakangas 1968 GIC 2504086 : if (doRequestWalReceiverReply)
1969 : {
417 heikki.linnakangas 1970 UIC 0 : doRequestWalReceiverReply = false;
1971 0 : WalRcvForceReply();
1972 : }
1973 :
1974 : /* Allow read-only connections if we're consistent now */
417 heikki.linnakangas 1975 GIC 2504086 : CheckRecoveryConsistency();
1976 :
1977 : /* Is this a timeline switch? */
1978 2504086 : if (switchedTLI)
1979 : {
1980 : /*
1981 : * Before we continue on the new timeline, clean up any (possibly
1982 : * bogus) future WAL segments on the old timeline.
1983 : */
1984 9 : RemoveNonParentXlogFiles(xlogreader->EndRecPtr, *replayTLI);
1985 :
1986 : /* Reset the prefetcher. */
367 tmunro 1987 9 : XLogPrefetchReconfigure();
1988 : }
417 heikki.linnakangas 1989 CBC 2504086 : }
1990 :
417 heikki.linnakangas 1991 EUB : /*
1992 : * Some XLOG RM record types that are directly related to WAL recovery are
1993 : * handled here rather than in the xlog_redo()
1994 : */
1995 : static void
417 heikki.linnakangas 1996 CBC 28161 : xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
1997 : {
417 heikki.linnakangas 1998 GIC 28161 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
417 heikki.linnakangas 1999 CBC 28161 : XLogRecPtr lsn = record->EndRecPtr;
2000 :
417 heikki.linnakangas 2001 GIC 28161 : Assert(XLogRecGetRmid(record) == RM_XLOG_ID);
2002 :
2003 28161 : if (info == XLOG_OVERWRITE_CONTRECORD)
2004 : {
417 heikki.linnakangas 2005 ECB : /* Verify the payload of a XLOG_OVERWRITE_CONTRECORD record. */
2006 : xl_overwrite_contrecord xlrec;
2007 :
417 heikki.linnakangas 2008 CBC 1 : memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
417 heikki.linnakangas 2009 GIC 1 : if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
417 heikki.linnakangas 2010 LBC 0 : elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
2011 : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2012 : LSN_FORMAT_ARGS(record->overwrittenRecPtr));
2013 :
2014 : /* We have safely skipped the aborted record */
382 alvherre 2015 GIC 1 : abortedRecPtr = InvalidXLogRecPtr;
2016 1 : missingContrecPtr = InvalidXLogRecPtr;
382 alvherre 2017 ECB :
417 heikki.linnakangas 2018 GIC 1 : ereport(LOG,
417 heikki.linnakangas 2019 ECB : (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
2020 : LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
2021 : timestamptz_to_str(xlrec.overwrite_time))));
2022 :
2023 : /* Verifying the record should only happen once */
417 heikki.linnakangas 2024 CBC 1 : record->overwrittenRecPtr = InvalidXLogRecPtr;
2025 : }
417 heikki.linnakangas 2026 GIC 28160 : else if (info == XLOG_BACKUP_END)
2027 : {
2028 : XLogRecPtr startpoint;
417 heikki.linnakangas 2029 ECB :
417 heikki.linnakangas 2030 CBC 61 : memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
417 heikki.linnakangas 2031 EUB :
417 heikki.linnakangas 2032 GIC 61 : if (backupStartPoint == startpoint)
2033 : {
2034 : /*
2035 : * We have reached the end of base backup, the point where
368 sfrost 2036 ECB : * pg_backup_stop() was done. The data on disk is now consistent
417 heikki.linnakangas 2037 : * (assuming we have also reached minRecoveryPoint). Set
2038 : * backupEndPoint to the current LSN, so that the next call to
2039 : * CheckRecoveryConsistency() will notice it and do the
2040 : * end-of-backup processing.
2041 : */
417 heikki.linnakangas 2042 GIC 50 : elog(DEBUG1, "end of backup record reached");
2043 :
2044 50 : backupEndPoint = lsn;
417 heikki.linnakangas 2045 ECB : }
2046 : else
417 heikki.linnakangas 2047 CBC 11 : elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X",
2048 : LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
2049 : }
417 heikki.linnakangas 2050 GIC 28161 : }
417 heikki.linnakangas 2051 ECB :
2052 : /*
255 alvherre 2053 : * Verify that, in non-test mode, ./pg_tblspc doesn't contain any real
2054 : * directories.
2055 : *
2056 : * Replay of database creation XLOG records for databases that were later
2057 : * dropped can create fake directories in pg_tblspc. By the time consistency
2058 : * is reached these directories should have been removed; here we verify
2059 : * that this did indeed happen. This is to be called at the point where
2060 : * consistent state is reached.
2061 : *
2062 : * allow_in_place_tablespaces turns the PANIC into a WARNING, which is
2063 : * useful for testing purposes, and also allows for an escape hatch in case
2064 : * things go south.
2065 : */
2066 : static void
255 alvherre 2067 GIC 75 : CheckTablespaceDirectory(void)
255 alvherre 2068 ECB : {
2069 : DIR *dir;
2070 : struct dirent *de;
2071 :
255 alvherre 2072 GIC 75 : dir = AllocateDir("pg_tblspc");
2073 227 : while ((de = ReadDir(dir, "pg_tblspc")) != NULL)
2074 : {
2075 : char path[MAXPGPATH + 10];
2076 :
2077 : /* Skip entries of non-oid names */
2078 152 : if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
2079 150 : continue;
2080 :
2081 2 : snprintf(path, sizeof(path), "pg_tblspc/%s", de->d_name);
2082 :
2083 2 : if (get_dirent_type(path, de, false, ERROR) != PGFILETYPE_LNK)
2084 1 : ereport(allow_in_place_tablespaces ? WARNING : PANIC,
2085 : (errcode(ERRCODE_DATA_CORRUPTED),
2086 : errmsg("unexpected directory entry \"%s\" found in %s",
2087 : de->d_name, "pg_tblspc/"),
255 alvherre 2088 ECB : errdetail("All directory entries in pg_tblspc/ should be symbolic links."),
2089 : errhint("Remove those directories, or set allow_in_place_tablespaces to ON transiently to let recovery complete.")));
2090 : }
255 alvherre 2091 GIC 75 : }
2092 :
417 heikki.linnakangas 2093 ECB : /*
2094 : * Checks if recovery has reached a consistent state. When consistency is
2095 : * reached and we have a valid starting standby snapshot, tell postmaster
2096 : * that it can start accepting read-only connections.
2097 : */
2098 : static void
417 heikki.linnakangas 2099 CBC 2504239 : CheckRecoveryConsistency(void)
417 heikki.linnakangas 2100 ECB : {
2101 : XLogRecPtr lastReplayedEndRecPtr;
2102 : TimeLineID lastReplayedTLI;
2103 :
2104 : /*
2105 : * During crash recovery, we don't reach a consistent state until we've
2106 : * replayed all the WAL.
2107 : */
417 heikki.linnakangas 2108 GIC 2504239 : if (XLogRecPtrIsInvalid(minRecoveryPoint))
2109 233325 : return;
2110 :
2111 2270914 : Assert(InArchiveRecovery);
417 heikki.linnakangas 2112 ECB :
2113 : /*
2114 : * assume that we are called in the startup process, and hence don't need
2115 : * a lock to read lastReplayedEndRecPtr
2116 : */
417 heikki.linnakangas 2117 GIC 2270914 : lastReplayedEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
2118 2270914 : lastReplayedTLI = XLogRecoveryCtl->lastReplayedTLI;
2119 :
417 heikki.linnakangas 2120 ECB : /*
2121 : * Have we reached the point where our base backup was completed?
2122 : */
417 heikki.linnakangas 2123 GIC 2270914 : if (!XLogRecPtrIsInvalid(backupEndPoint) &&
2124 71 : backupEndPoint <= lastReplayedEndRecPtr)
2125 : {
2126 51 : elog(DEBUG1, "end of backup reached");
2127 :
2128 : /*
417 heikki.linnakangas 2129 ECB : * We have reached the end of base backup, as indicated by pg_control.
2130 : * Update the control file accordingly.
2131 : */
417 heikki.linnakangas 2132 CBC 51 : ReachedEndOfBackup(lastReplayedEndRecPtr, lastReplayedTLI);
417 heikki.linnakangas 2133 GIC 51 : backupStartPoint = InvalidXLogRecPtr;
2134 51 : backupEndPoint = InvalidXLogRecPtr;
2135 51 : backupEndRequired = false;
2136 : }
2137 :
417 heikki.linnakangas 2138 ECB : /*
2139 : * Have we passed our safe starting point? Note that minRecoveryPoint is
2140 : * known to be incorrectly set if recovering from a backup, until the
2141 : * XLOG_BACKUP_END arrives to advise us of the correct minRecoveryPoint.
2142 : * All we know prior to that is that we're not consistent yet.
2143 : */
417 heikki.linnakangas 2144 CBC 2270914 : if (!reachedConsistency && !backupEndRequired &&
2145 7774 : minRecoveryPoint <= lastReplayedEndRecPtr)
2146 : {
417 heikki.linnakangas 2147 ECB : /*
2148 : * Check to see if the XLOG sequence contained any unresolved
2149 : * references to uninitialized pages.
2150 : */
417 heikki.linnakangas 2151 GIC 75 : XLogCheckInvalidPages();
2152 :
255 alvherre 2153 ECB : /*
2154 : * Check that pg_tblspc doesn't contain any real directories. Replay
2155 : * of Database/CREATE_* records may have created fictitious tablespace
2156 : * directories that should have been removed by the time consistency
2157 : * was reached.
2158 : */
255 alvherre 2159 GIC 75 : CheckTablespaceDirectory();
2160 :
417 heikki.linnakangas 2161 75 : reachedConsistency = true;
2162 75 : ereport(LOG,
2163 : (errmsg("consistent recovery state reached at %X/%X",
2164 : LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
417 heikki.linnakangas 2165 ECB : }
2166 :
2167 : /*
2168 : * Have we got a valid starting snapshot that will allow queries to be
2169 : * run? If so, we can tell postmaster that the database is consistent now,
2170 : * enabling connections.
2171 : */
417 heikki.linnakangas 2172 CBC 2270914 : if (standbyState == STANDBY_SNAPSHOT_READY &&
417 heikki.linnakangas 2173 GIC 2270711 : !LocalHotStandbyActive &&
2174 71 : reachedConsistency &&
2175 : IsUnderPostmaster)
2176 : {
2177 71 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
2178 71 : XLogRecoveryCtl->SharedHotStandbyActive = true;
2179 71 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 2180 ECB :
417 heikki.linnakangas 2181 GIC 71 : LocalHotStandbyActive = true;
417 heikki.linnakangas 2182 ECB :
417 heikki.linnakangas 2183 CBC 71 : SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
2184 : }
2185 : }
2186 :
2187 : /*
2188 : * Error context callback for errors occurring during rm_redo().
2189 : */
2190 : static void
417 heikki.linnakangas 2191 GIC 143 : rm_redo_error_callback(void *arg)
2192 : {
417 heikki.linnakangas 2193 CBC 143 : XLogReaderState *record = (XLogReaderState *) arg;
417 heikki.linnakangas 2194 ECB : StringInfoData buf;
2195 :
417 heikki.linnakangas 2196 GIC 143 : initStringInfo(&buf);
2197 143 : xlog_outdesc(&buf, record);
417 heikki.linnakangas 2198 CBC 143 : xlog_block_info(&buf, record);
417 heikki.linnakangas 2199 ECB :
2200 : /* translator: %s is a WAL record description */
417 heikki.linnakangas 2201 GIC 143 : errcontext("WAL redo at %X/%X for %s",
417 heikki.linnakangas 2202 CBC 143 : LSN_FORMAT_ARGS(record->ReadRecPtr),
2203 : buf.data);
417 heikki.linnakangas 2204 ECB :
417 heikki.linnakangas 2205 GIC 143 : pfree(buf.data);
2206 143 : }
2207 :
2208 : /*
2209 : * Returns a string describing an XLogRecord, consisting of its identity
2210 : * optionally followed by a colon, a space, and a further description.
2211 : */
417 heikki.linnakangas 2212 ECB : void
417 heikki.linnakangas 2213 GIC 143 : xlog_outdesc(StringInfo buf, XLogReaderState *record)
417 heikki.linnakangas 2214 ECB : {
368 jdavis 2215 GIC 143 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
417 heikki.linnakangas 2216 143 : uint8 info = XLogRecGetInfo(record);
417 heikki.linnakangas 2217 ECB : const char *id;
2218 :
368 jdavis 2219 CBC 143 : appendStringInfoString(buf, rmgr.rm_name);
417 heikki.linnakangas 2220 GIC 143 : appendStringInfoChar(buf, '/');
2221 :
368 jdavis 2222 CBC 143 : id = rmgr.rm_identify(info);
417 heikki.linnakangas 2223 143 : if (id == NULL)
417 heikki.linnakangas 2224 UIC 0 : appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
2225 : else
417 heikki.linnakangas 2226 CBC 143 : appendStringInfo(buf, "%s: ", id);
417 heikki.linnakangas 2227 ECB :
368 jdavis 2228 GIC 143 : rmgr.rm_desc(buf, record);
417 heikki.linnakangas 2229 143 : }
2230 :
2231 : #ifdef WAL_DEBUG
2232 :
2233 : static void
417 heikki.linnakangas 2234 ECB : xlog_outrec(StringInfo buf, XLogReaderState *record)
2235 : {
2236 : appendStringInfo(buf, "prev %X/%X; xid %u",
2237 : LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
2238 : XLogRecGetXid(record));
2239 :
2240 : appendStringInfo(buf, "; len %u",
2241 : XLogRecGetDataLen(record));
2242 :
2243 : xlog_block_info(buf, record);
2244 : }
417 heikki.linnakangas 2245 EUB : #endif /* WAL_DEBUG */
2246 :
417 heikki.linnakangas 2247 ECB : /*
2248 : * Returns a string giving information about all the blocks in an
2249 : * XLogRecord.
2250 : */
2251 : static void
417 heikki.linnakangas 2252 GIC 143 : xlog_block_info(StringInfo buf, XLogReaderState *record)
2253 : {
2254 : int block_id;
2255 :
2256 : /* decode block references */
387 tmunro 2257 217 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2258 : {
2259 : RelFileLocator rlocator;
2260 : ForkNumber forknum;
2261 : BlockNumber blk;
2262 :
363 tgl 2263 74 : if (!XLogRecGetBlockTagExtended(record, block_id,
2264 : &rlocator, &forknum, &blk, NULL))
417 heikki.linnakangas 2265 UIC 0 : continue;
2266 :
417 heikki.linnakangas 2267 GIC 74 : if (forknum != MAIN_FORKNUM)
193 rhaas 2268 11 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
2269 : block_id,
2270 : rlocator.spcOid, rlocator.dbOid,
2271 : rlocator.relNumber,
2272 : forknum,
2273 : blk);
417 heikki.linnakangas 2274 ECB : else
193 rhaas 2275 GIC 63 : appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
2276 : block_id,
2277 : rlocator.spcOid, rlocator.dbOid,
2278 : rlocator.relNumber,
2279 : blk);
417 heikki.linnakangas 2280 CBC 74 : if (XLogRecHasBlockImage(record, block_id))
417 heikki.linnakangas 2281 GIC 49 : appendStringInfoString(buf, " FPW");
2282 : }
2283 143 : }
2284 :
2285 :
417 heikki.linnakangas 2286 ECB : /*
2287 : * Check that it's OK to switch to new timeline during recovery.
417 heikki.linnakangas 2288 EUB : *
2289 : * 'lsn' is the address of the shutdown checkpoint record we're about to
417 heikki.linnakangas 2290 ECB : * replay. (Currently, timeline can only change at a shutdown checkpoint).
2291 : */
2292 : static void
417 heikki.linnakangas 2293 GIC 9 : checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
2294 : TimeLineID replayTLI)
2295 : {
2296 : /* Check that the record agrees on what the current (old) timeline is */
2297 9 : if (prevTLI != replayTLI)
417 heikki.linnakangas 2298 LBC 0 : ereport(PANIC,
2299 : (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
2300 : prevTLI, replayTLI)));
2301 :
2302 : /*
417 heikki.linnakangas 2303 ECB : * The new timeline better be in the list of timelines we expect to see,
2304 : * according to the timeline history. It should also not decrease.
2305 : */
417 heikki.linnakangas 2306 CBC 9 : if (newTLI < replayTLI || !tliInHistory(newTLI, expectedTLEs))
417 heikki.linnakangas 2307 UIC 0 : ereport(PANIC,
2308 : (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
2309 : newTLI, replayTLI)));
2310 :
2311 : /*
2312 : * If we have not yet reached min recovery point, and we're about to
2313 : * switch to a timeline greater than the timeline of the min recovery
2314 : * point: trouble. After switching to the new timeline, we could not
2315 : * possibly visit the min recovery point on the correct timeline anymore.
417 heikki.linnakangas 2316 ECB : * This can happen if there is a newer timeline in the archive that
2317 : * branched before the timeline the min recovery point is on, and you
2318 : * attempt to do PITR to the new timeline.
2319 : */
417 heikki.linnakangas 2320 CBC 9 : if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
417 heikki.linnakangas 2321 GBC 8 : lsn < minRecoveryPoint &&
417 heikki.linnakangas 2322 GIC 1 : newTLI > minRecoveryPointTLI)
417 heikki.linnakangas 2323 UIC 0 : ereport(PANIC,
2324 : (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
2325 : newTLI,
2326 : LSN_FORMAT_ARGS(minRecoveryPoint),
2327 : minRecoveryPointTLI)));
2328 :
417 heikki.linnakangas 2329 ECB : /* Looks good */
417 heikki.linnakangas 2330 GBC 9 : }
2331 :
2332 :
2333 : /*
2334 : * Extract timestamp from WAL record.
2335 : *
2336 : * If the record contains a timestamp, returns true, and saves the timestamp
2337 : * in *recordXtime. If the record type has no timestamp, returns false.
2338 : * Currently, only transaction commit/abort records and restore points contain
2339 : * timestamps.
2340 : */
2341 : static bool
417 heikki.linnakangas 2342 GIC 36360 : getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
417 heikki.linnakangas 2343 ECB : {
417 heikki.linnakangas 2344 CBC 36360 : uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2345 36360 : uint8 xact_info = info & XLOG_XACT_OPMASK;
417 heikki.linnakangas 2346 GBC 36360 : uint8 rmid = XLogRecGetRmid(record);
2347 :
417 heikki.linnakangas 2348 GIC 36360 : if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
2349 : {
2350 2 : *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
2351 2 : return true;
2352 : }
417 heikki.linnakangas 2353 CBC 36358 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT ||
2354 : xact_info == XLOG_XACT_COMMIT_PREPARED))
2355 : {
417 heikki.linnakangas 2356 GIC 33540 : *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
2357 33540 : return true;
2358 : }
2359 2818 : if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT ||
2360 : xact_info == XLOG_XACT_ABORT_PREPARED))
2361 : {
2362 2818 : *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
2363 2818 : return true;
2364 : }
417 heikki.linnakangas 2365 LBC 0 : return false;
2366 : }
417 heikki.linnakangas 2367 ECB :
2368 : /*
2369 : * Checks whether the current buffer page and backup page stored in the
2370 : * WAL record are consistent or not. Before comparing the two pages, a
2371 : * masking can be applied to the pages to ignore certain areas like hint bits,
2372 : * unused space between pd_lower and pd_upper among other things. This
2373 : * function should be called once WAL replay has been completed for a
2374 : * given record.
2375 : */
2376 : static void
417 heikki.linnakangas 2377 UIC 0 : verifyBackupPageConsistency(XLogReaderState *record)
2378 : {
368 jdavis 2379 LBC 0 : RmgrData rmgr = GetRmgr(XLogRecGetRmid(record));
2380 : RelFileLocator rlocator;
2381 : ForkNumber forknum;
417 heikki.linnakangas 2382 ECB : BlockNumber blkno;
2383 : int block_id;
2384 :
2385 : /* Records with no backup blocks have no need for consistency checks. */
417 heikki.linnakangas 2386 LBC 0 : if (!XLogRecHasAnyBlockRefs(record))
417 heikki.linnakangas 2387 UIC 0 : return;
417 heikki.linnakangas 2388 EUB :
417 heikki.linnakangas 2389 UIC 0 : Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
2390 :
387 tmunro 2391 0 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
2392 : {
2393 : Buffer buf;
2394 : Page page;
2395 :
363 tgl 2396 0 : if (!XLogRecGetBlockTagExtended(record, block_id,
2397 : &rlocator, &forknum, &blkno, NULL))
2398 : {
2399 : /*
417 heikki.linnakangas 2400 EUB : * WAL record doesn't contain a block reference with the given id.
2401 : * Do nothing.
2402 : */
417 heikki.linnakangas 2403 UIC 0 : continue;
2404 : }
2405 :
2406 0 : Assert(XLogRecHasBlockImage(record, block_id));
2407 :
2408 0 : if (XLogRecBlockImageApply(record, block_id))
417 heikki.linnakangas 2409 EUB : {
2410 : /*
2411 : * WAL record has already applied the page, so bypass the
2412 : * consistency check as that would result in comparing the full
2413 : * page stored in the record with itself.
2414 : */
417 heikki.linnakangas 2415 UIC 0 : continue;
2416 : }
2417 :
2418 : /*
417 heikki.linnakangas 2419 EUB : * Read the contents from the current buffer and store it in a
2420 : * temporary page.
2421 : */
277 rhaas 2422 UNC 0 : buf = XLogReadBufferExtended(rlocator, forknum, blkno,
2423 : RBM_NORMAL_NO_LOG,
2424 : InvalidBuffer);
417 heikki.linnakangas 2425 UIC 0 : if (!BufferIsValid(buf))
417 heikki.linnakangas 2426 UBC 0 : continue;
2427 :
417 heikki.linnakangas 2428 UIC 0 : LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
417 heikki.linnakangas 2429 UBC 0 : page = BufferGetPage(buf);
2430 :
417 heikki.linnakangas 2431 EUB : /*
2432 : * Take a copy of the local page where WAL has been applied to have a
2433 : * comparison base before masking it...
2434 : */
417 heikki.linnakangas 2435 UIC 0 : memcpy(replay_image_masked, page, BLCKSZ);
2436 :
2437 : /* No need for this page anymore now that a copy is in. */
417 heikki.linnakangas 2438 UBC 0 : UnlockReleaseBuffer(buf);
2439 :
2440 : /*
2441 : * If the block LSN is already ahead of this WAL record, we can't
2442 : * expect contents to match. This can happen if recovery is
2443 : * restarted.
2444 : */
2445 0 : if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
417 heikki.linnakangas 2446 UIC 0 : continue;
2447 :
417 heikki.linnakangas 2448 EUB : /*
2449 : * Read the contents from the backup copy, stored in WAL record and
2450 : * store it in a temporary page. There is no need to allocate a new
2451 : * page here, a local buffer is fine to hold its contents and a mask
2452 : * can be directly applied on it.
2453 : */
417 heikki.linnakangas 2454 UIC 0 : if (!RestoreBlockImage(record, block_id, primary_image_masked))
212 michael 2455 0 : ereport(ERROR,
2456 : (errcode(ERRCODE_INTERNAL_ERROR),
2457 : errmsg_internal("%s", record->errormsg_buf)));
417 heikki.linnakangas 2458 EUB :
2459 : /*
2460 : * If masking function is defined, mask both the primary and replay
2461 : * images
2462 : */
368 jdavis 2463 UIC 0 : if (rmgr.rm_mask != NULL)
2464 : {
2465 0 : rmgr.rm_mask(replay_image_masked, blkno);
2466 0 : rmgr.rm_mask(primary_image_masked, blkno);
2467 : }
417 heikki.linnakangas 2468 EUB :
2469 : /* Time to compare the primary and replay images. */
417 heikki.linnakangas 2470 UIC 0 : if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
2471 : {
2472 0 : elog(FATAL,
2473 : "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
2474 : rlocator.spcOid, rlocator.dbOid, rlocator.relNumber,
2475 : forknum, blkno);
2476 : }
417 heikki.linnakangas 2477 EUB : }
2478 : }
2479 :
2480 : /*
2481 : * For point-in-time recovery, this function decides whether we want to
2482 : * stop applying the XLOG before the current record.
2483 : *
2484 : * Returns true if we are stopping, false otherwise. If stopping, some
2485 : * information is saved in recoveryStopXid et al for use in annotating the
2486 : * new timeline's history file.
2487 : */
2488 : static bool
417 heikki.linnakangas 2489 GBC 2504088 : recoveryStopsBefore(XLogReaderState *record)
2490 : {
417 heikki.linnakangas 2491 GIC 2504088 : bool stopsHere = false;
2492 : uint8 xact_info;
417 heikki.linnakangas 2493 EUB : bool isCommit;
417 heikki.linnakangas 2494 GIC 2504088 : TimestampTz recordXtime = 0;
417 heikki.linnakangas 2495 EUB : TransactionId recordXid;
2496 :
2497 : /*
2498 : * Ignore recovery target settings when not in archive recovery (meaning
2499 : * we are in crash recovery).
2500 : */
417 heikki.linnakangas 2501 GIC 2504088 : if (!ArchiveRecoveryRequested)
2502 222595 : return false;
2503 :
2504 : /* Check if we should stop as soon as reaching consistency */
2505 2281493 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2506 : {
417 heikki.linnakangas 2507 UIC 0 : ereport(LOG,
2508 : (errmsg("recovery stopping after reaching consistency")));
2509 :
2510 0 : recoveryStopAfter = false;
2511 0 : recoveryStopXid = InvalidTransactionId;
417 heikki.linnakangas 2512 LBC 0 : recoveryStopLSN = InvalidXLogRecPtr;
417 heikki.linnakangas 2513 UIC 0 : recoveryStopTime = 0;
417 heikki.linnakangas 2514 LBC 0 : recoveryStopName[0] = '\0';
417 heikki.linnakangas 2515 UIC 0 : return true;
2516 : }
417 heikki.linnakangas 2517 ECB :
2518 : /* Check if target LSN has been reached */
417 heikki.linnakangas 2519 GIC 2281493 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
417 heikki.linnakangas 2520 UIC 0 : !recoveryTargetInclusive &&
2521 0 : record->ReadRecPtr >= recoveryTargetLSN)
2522 : {
2523 0 : recoveryStopAfter = false;
417 heikki.linnakangas 2524 LBC 0 : recoveryStopXid = InvalidTransactionId;
2525 0 : recoveryStopLSN = record->ReadRecPtr;
417 heikki.linnakangas 2526 UIC 0 : recoveryStopTime = 0;
2527 0 : recoveryStopName[0] = '\0';
417 heikki.linnakangas 2528 LBC 0 : ereport(LOG,
2529 : (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
417 heikki.linnakangas 2530 EUB : LSN_FORMAT_ARGS(recoveryStopLSN))));
417 heikki.linnakangas 2531 UIC 0 : return true;
2532 : }
417 heikki.linnakangas 2533 EUB :
2534 : /* Otherwise we only consider stopping before COMMIT or ABORT records. */
417 heikki.linnakangas 2535 GBC 2281493 : if (XLogRecGetRmid(record) != RM_XACT_ID)
2536 2263117 : return false;
417 heikki.linnakangas 2537 EUB :
417 heikki.linnakangas 2538 GBC 18376 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
2539 :
417 heikki.linnakangas 2540 GIC 18376 : if (xact_info == XLOG_XACT_COMMIT)
2541 : {
417 heikki.linnakangas 2542 CBC 16751 : isCommit = true;
417 heikki.linnakangas 2543 GBC 16751 : recordXid = XLogRecGetXid(record);
417 heikki.linnakangas 2544 EUB : }
417 heikki.linnakangas 2545 GIC 1625 : else if (xact_info == XLOG_XACT_COMMIT_PREPARED)
417 heikki.linnakangas 2546 EUB : {
417 heikki.linnakangas 2547 GBC 19 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
417 heikki.linnakangas 2548 EUB : xl_xact_parsed_commit parsed;
2549 :
417 heikki.linnakangas 2550 GBC 19 : isCommit = true;
2551 19 : ParseCommitRecord(XLogRecGetInfo(record),
2552 : xlrec,
2553 : &parsed);
2554 19 : recordXid = parsed.twophase_xid;
2555 : }
417 heikki.linnakangas 2556 GIC 1606 : else if (xact_info == XLOG_XACT_ABORT)
2557 : {
417 heikki.linnakangas 2558 CBC 1400 : isCommit = false;
2559 1400 : recordXid = XLogRecGetXid(record);
2560 : }
2561 206 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
2562 : {
2563 9 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
2564 : xl_xact_parsed_abort parsed;
417 heikki.linnakangas 2565 ECB :
417 heikki.linnakangas 2566 CBC 9 : isCommit = false;
417 heikki.linnakangas 2567 GIC 9 : ParseAbortRecord(XLogRecGetInfo(record),
417 heikki.linnakangas 2568 ECB : xlrec,
2569 : &parsed);
417 heikki.linnakangas 2570 CBC 9 : recordXid = parsed.twophase_xid;
2571 : }
2572 : else
2573 197 : return false;
417 heikki.linnakangas 2574 ECB :
417 heikki.linnakangas 2575 GIC 18179 : if (recoveryTarget == RECOVERY_TARGET_XID && !recoveryTargetInclusive)
2576 : {
417 heikki.linnakangas 2577 ECB : /*
2578 : * There can be only one transaction end record with this exact
2579 : * transactionid
2580 : *
2581 : * when testing for an xid, we MUST test for equality only, since
2582 : * transactions are numbered in the order they start, not the order
2583 : * they complete. A higher numbered xid will complete before you about
2584 : * 50% of the time...
2585 : */
417 heikki.linnakangas 2586 LBC 0 : stopsHere = (recordXid == recoveryTargetXid);
2587 : }
2588 :
80 tgl 2589 ECB : /*
2590 : * Note: we must fetch recordXtime regardless of recoveryTarget setting.
2591 : * We don't expect getRecordTimestamp ever to fail, since we already know
2592 : * this is a commit or abort record; but test its result anyway.
2593 : */
80 tgl 2594 GIC 18179 : if (getRecordTimestamp(record, &recordXtime) &&
2595 18179 : recoveryTarget == RECOVERY_TARGET_TIME)
417 heikki.linnakangas 2596 ECB : {
2597 : /*
2598 : * There can be many transactions that share the same commit time, so
2599 : * we stop after the last one, if we are inclusive, or stop at the
2600 : * first one if we are exclusive
2601 : */
417 heikki.linnakangas 2602 UIC 0 : if (recoveryTargetInclusive)
2603 0 : stopsHere = (recordXtime > recoveryTargetTime);
2604 : else
2605 0 : stopsHere = (recordXtime >= recoveryTargetTime);
2606 : }
2607 :
417 heikki.linnakangas 2608 GIC 18179 : if (stopsHere)
417 heikki.linnakangas 2609 EUB : {
417 heikki.linnakangas 2610 UIC 0 : recoveryStopAfter = false;
2611 0 : recoveryStopXid = recordXid;
2612 0 : recoveryStopTime = recordXtime;
2613 0 : recoveryStopLSN = InvalidXLogRecPtr;
2614 0 : recoveryStopName[0] = '\0';
2615 :
2616 0 : if (isCommit)
417 heikki.linnakangas 2617 ECB : {
417 heikki.linnakangas 2618 LBC 0 : ereport(LOG,
2619 : (errmsg("recovery stopping before commit of transaction %u, time %s",
2620 : recoveryStopXid,
2621 : timestamptz_to_str(recoveryStopTime))));
2622 : }
2623 : else
2624 : {
417 heikki.linnakangas 2625 UBC 0 : ereport(LOG,
417 heikki.linnakangas 2626 EUB : (errmsg("recovery stopping before abort of transaction %u, time %s",
2627 : recoveryStopXid,
2628 : timestamptz_to_str(recoveryStopTime))));
2629 : }
2630 : }
417 heikki.linnakangas 2631 ECB :
417 heikki.linnakangas 2632 GIC 18179 : return stopsHere;
417 heikki.linnakangas 2633 EUB : }
2634 :
2635 : /*
2636 : * Same as recoveryStopsBefore, but called after applying the record.
2637 : *
2638 : * We also track the timestamp of the latest applied COMMIT/ABORT
2639 : * record in XLogRecoveryCtl->recoveryLastXTime.
2640 : */
2641 : static bool
417 heikki.linnakangas 2642 GIC 2504086 : recoveryStopsAfter(XLogReaderState *record)
2643 : {
2644 : uint8 info;
2645 : uint8 xact_info;
2646 : uint8 rmid;
2647 : TimestampTz recordXtime;
417 heikki.linnakangas 2648 EUB :
2649 : /*
2650 : * Ignore recovery target settings when not in archive recovery (meaning
2651 : * we are in crash recovery).
2652 : */
417 heikki.linnakangas 2653 GIC 2504086 : if (!ArchiveRecoveryRequested)
2654 222595 : return false;
417 heikki.linnakangas 2655 ECB :
417 heikki.linnakangas 2656 GIC 2281491 : info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
2657 2281491 : rmid = XLogRecGetRmid(record);
2658 :
2659 : /*
2660 : * There can be many restore points that share the same name; we stop at
2661 : * the first one.
2662 : */
2663 2281491 : if (recoveryTarget == RECOVERY_TARGET_NAME &&
2664 19 : rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
417 heikki.linnakangas 2665 ECB : {
2666 : xl_restore_point *recordRestorePointData;
2667 :
417 heikki.linnakangas 2668 GIC 3 : recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
2669 :
2670 3 : if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
2671 : {
2672 2 : recoveryStopAfter = true;
2673 2 : recoveryStopXid = InvalidTransactionId;
2674 2 : recoveryStopLSN = InvalidXLogRecPtr;
2675 2 : (void) getRecordTimestamp(record, &recoveryStopTime);
417 heikki.linnakangas 2676 CBC 2 : strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
417 heikki.linnakangas 2677 ECB :
417 heikki.linnakangas 2678 GIC 2 : ereport(LOG,
417 heikki.linnakangas 2679 ECB : (errmsg("recovery stopping at restore point \"%s\", time %s",
2680 : recoveryStopName,
2681 : timestamptz_to_str(recoveryStopTime))));
417 heikki.linnakangas 2682 GIC 2 : return true;
2683 : }
2684 : }
2685 :
417 heikki.linnakangas 2686 ECB : /* Check if the target LSN has been reached */
417 heikki.linnakangas 2687 CBC 2281489 : if (recoveryTarget == RECOVERY_TARGET_LSN &&
417 heikki.linnakangas 2688 UIC 0 : recoveryTargetInclusive &&
2689 0 : record->ReadRecPtr >= recoveryTargetLSN)
2690 : {
417 heikki.linnakangas 2691 LBC 0 : recoveryStopAfter = true;
417 heikki.linnakangas 2692 UIC 0 : recoveryStopXid = InvalidTransactionId;
417 heikki.linnakangas 2693 LBC 0 : recoveryStopLSN = record->ReadRecPtr;
417 heikki.linnakangas 2694 UIC 0 : recoveryStopTime = 0;
417 heikki.linnakangas 2695 LBC 0 : recoveryStopName[0] = '\0';
2696 0 : ereport(LOG,
417 heikki.linnakangas 2697 ECB : (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
2698 : LSN_FORMAT_ARGS(recoveryStopLSN))));
417 heikki.linnakangas 2699 LBC 0 : return true;
2700 : }
417 heikki.linnakangas 2701 ECB :
417 heikki.linnakangas 2702 GIC 2281489 : if (rmid != RM_XACT_ID)
2703 2263113 : return false;
2704 :
417 heikki.linnakangas 2705 CBC 18376 : xact_info = info & XLOG_XACT_OPMASK;
2706 :
417 heikki.linnakangas 2707 GIC 18376 : if (xact_info == XLOG_XACT_COMMIT ||
2708 1606 : xact_info == XLOG_XACT_COMMIT_PREPARED ||
2709 206 : xact_info == XLOG_XACT_ABORT ||
417 heikki.linnakangas 2710 ECB : xact_info == XLOG_XACT_ABORT_PREPARED)
417 heikki.linnakangas 2711 EUB : {
2712 : TransactionId recordXid;
2713 :
2714 : /* Update the last applied transaction timestamp */
417 heikki.linnakangas 2715 GBC 18179 : if (getRecordTimestamp(record, &recordXtime))
2716 18179 : SetLatestXTime(recordXtime);
417 heikki.linnakangas 2717 EUB :
2718 : /* Extract the XID of the committed/aborted transaction */
417 heikki.linnakangas 2719 GBC 18179 : if (xact_info == XLOG_XACT_COMMIT_PREPARED)
2720 : {
417 heikki.linnakangas 2721 GIC 19 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
417 heikki.linnakangas 2722 EUB : xl_xact_parsed_commit parsed;
2723 :
417 heikki.linnakangas 2724 GIC 19 : ParseCommitRecord(XLogRecGetInfo(record),
417 heikki.linnakangas 2725 ECB : xlrec,
2726 : &parsed);
417 heikki.linnakangas 2727 GIC 19 : recordXid = parsed.twophase_xid;
417 heikki.linnakangas 2728 ECB : }
417 heikki.linnakangas 2729 GIC 18160 : else if (xact_info == XLOG_XACT_ABORT_PREPARED)
417 heikki.linnakangas 2730 ECB : {
417 heikki.linnakangas 2731 CBC 9 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
417 heikki.linnakangas 2732 ECB : xl_xact_parsed_abort parsed;
2733 :
417 heikki.linnakangas 2734 GIC 9 : ParseAbortRecord(XLogRecGetInfo(record),
2735 : xlrec,
2736 : &parsed);
2737 9 : recordXid = parsed.twophase_xid;
417 heikki.linnakangas 2738 ECB : }
2739 : else
417 heikki.linnakangas 2740 GIC 18151 : recordXid = XLogRecGetXid(record);
2741 :
417 heikki.linnakangas 2742 ECB : /*
2743 : * There can be only one transaction end record with this exact
2744 : * transactionid
2745 : *
2746 : * when testing for an xid, we MUST test for equality only, since
2747 : * transactions are numbered in the order they start, not the order
2748 : * they complete. A higher numbered xid will complete before you about
2749 : * 50% of the time...
2750 : */
417 heikki.linnakangas 2751 GIC 18179 : if (recoveryTarget == RECOVERY_TARGET_XID && recoveryTargetInclusive &&
417 heikki.linnakangas 2752 LBC 0 : recordXid == recoveryTargetXid)
2753 : {
2754 0 : recoveryStopAfter = true;
417 heikki.linnakangas 2755 UIC 0 : recoveryStopXid = recordXid;
2756 0 : recoveryStopTime = recordXtime;
417 heikki.linnakangas 2757 LBC 0 : recoveryStopLSN = InvalidXLogRecPtr;
417 heikki.linnakangas 2758 UIC 0 : recoveryStopName[0] = '\0';
2759 :
417 heikki.linnakangas 2760 LBC 0 : if (xact_info == XLOG_XACT_COMMIT ||
2761 : xact_info == XLOG_XACT_COMMIT_PREPARED)
2762 : {
2763 0 : ereport(LOG,
2764 : (errmsg("recovery stopping after commit of transaction %u, time %s",
2765 : recoveryStopXid,
2766 : timestamptz_to_str(recoveryStopTime))));
2767 : }
417 heikki.linnakangas 2768 UIC 0 : else if (xact_info == XLOG_XACT_ABORT ||
2769 : xact_info == XLOG_XACT_ABORT_PREPARED)
2770 : {
2771 0 : ereport(LOG,
2772 : (errmsg("recovery stopping after abort of transaction %u, time %s",
2773 : recoveryStopXid,
417 heikki.linnakangas 2774 ECB : timestamptz_to_str(recoveryStopTime))));
417 heikki.linnakangas 2775 EUB : }
417 heikki.linnakangas 2776 UIC 0 : return true;
417 heikki.linnakangas 2777 EUB : }
2778 : }
2779 :
2780 : /* Check if we should stop as soon as reaching consistency */
417 heikki.linnakangas 2781 GBC 18376 : if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
2782 : {
417 heikki.linnakangas 2783 UBC 0 : ereport(LOG,
2784 : (errmsg("recovery stopping after reaching consistency")));
2785 :
2786 0 : recoveryStopAfter = true;
417 heikki.linnakangas 2787 UIC 0 : recoveryStopXid = InvalidTransactionId;
2788 0 : recoveryStopTime = 0;
2789 0 : recoveryStopLSN = InvalidXLogRecPtr;
2790 0 : recoveryStopName[0] = '\0';
417 heikki.linnakangas 2791 UBC 0 : return true;
2792 : }
2793 :
417 heikki.linnakangas 2794 GBC 18376 : return false;
2795 : }
2796 :
2797 : /*
2798 : * Create a comment for the history file to explain why and where
417 heikki.linnakangas 2799 EUB : * timeline changed.
2800 : */
2801 : static char *
417 heikki.linnakangas 2802 GIC 1142 : getRecoveryStopReason(void)
2803 : {
417 heikki.linnakangas 2804 ECB : char reason[200];
2805 :
417 heikki.linnakangas 2806 GBC 1142 : if (recoveryTarget == RECOVERY_TARGET_XID)
417 heikki.linnakangas 2807 UIC 0 : snprintf(reason, sizeof(reason),
2808 : "%s transaction %u",
417 heikki.linnakangas 2809 UBC 0 : recoveryStopAfter ? "after" : "before",
417 heikki.linnakangas 2810 EUB : recoveryStopXid);
417 heikki.linnakangas 2811 GBC 1142 : else if (recoveryTarget == RECOVERY_TARGET_TIME)
417 heikki.linnakangas 2812 UBC 0 : snprintf(reason, sizeof(reason),
417 heikki.linnakangas 2813 EUB : "%s %s\n",
417 heikki.linnakangas 2814 UBC 0 : recoveryStopAfter ? "after" : "before",
2815 : timestamptz_to_str(recoveryStopTime));
417 heikki.linnakangas 2816 GIC 1142 : else if (recoveryTarget == RECOVERY_TARGET_LSN)
417 heikki.linnakangas 2817 LBC 0 : snprintf(reason, sizeof(reason),
2818 : "%s LSN %X/%X\n",
417 heikki.linnakangas 2819 UIC 0 : recoveryStopAfter ? "after" : "before",
2820 0 : LSN_FORMAT_ARGS(recoveryStopLSN));
417 heikki.linnakangas 2821 GIC 1142 : else if (recoveryTarget == RECOVERY_TARGET_NAME)
2822 3 : snprintf(reason, sizeof(reason),
2823 : "at restore point \"%s\"",
2824 : recoveryStopName);
417 heikki.linnakangas 2825 CBC 1139 : else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
417 heikki.linnakangas 2826 UIC 0 : snprintf(reason, sizeof(reason), "reached consistency");
2827 : else
417 heikki.linnakangas 2828 GIC 1139 : snprintf(reason, sizeof(reason), "no recovery target specified");
417 heikki.linnakangas 2829 ECB :
417 heikki.linnakangas 2830 GBC 1142 : return pstrdup(reason);
2831 : }
417 heikki.linnakangas 2832 EUB :
2833 : /*
417 heikki.linnakangas 2834 ECB : * Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
417 heikki.linnakangas 2835 EUB : *
2836 : * endOfRecovery is true if the recovery target is reached and
2837 : * the paused state starts at the end of recovery because of
2838 : * recovery_target_action=pause, and false otherwise.
417 heikki.linnakangas 2839 ECB : */
417 heikki.linnakangas 2840 EUB : static void
417 heikki.linnakangas 2841 GIC 2 : recoveryPausesHere(bool endOfRecovery)
417 heikki.linnakangas 2842 EUB : {
2843 : /* Don't pause unless users can connect! */
417 heikki.linnakangas 2844 CBC 2 : if (!LocalHotStandbyActive)
417 heikki.linnakangas 2845 LBC 0 : return;
2846 :
2847 : /* Don't pause after standby promotion has been triggered */
417 heikki.linnakangas 2848 CBC 2 : if (LocalPromoteIsTriggered)
417 heikki.linnakangas 2849 UBC 0 : return;
2850 :
417 heikki.linnakangas 2851 CBC 2 : if (endOfRecovery)
417 heikki.linnakangas 2852 UIC 0 : ereport(LOG,
417 heikki.linnakangas 2853 ECB : (errmsg("pausing at the end of recovery"),
2854 : errhint("Execute pg_wal_replay_resume() to promote.")));
2855 : else
417 heikki.linnakangas 2856 GIC 2 : ereport(LOG,
2857 : (errmsg("recovery has paused"),
2858 : errhint("Execute pg_wal_replay_resume() to continue.")));
2859 :
2860 : /* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
2861 7 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
2862 : {
2863 6 : HandleStartupProcInterrupts();
417 heikki.linnakangas 2864 CBC 6 : if (CheckForStandbyTrigger())
417 heikki.linnakangas 2865 GIC 1 : return;
2866 :
417 heikki.linnakangas 2867 ECB : /*
417 heikki.linnakangas 2868 EUB : * If recovery pause is requested then set it paused. While we are in
2869 : * the loop, user might resume and pause again so set this every time.
2870 : */
417 heikki.linnakangas 2871 CBC 5 : ConfirmRecoveryPaused();
417 heikki.linnakangas 2872 EUB :
2873 : /*
417 heikki.linnakangas 2874 ECB : * We wait on a condition variable that will wake us as soon as the
417 heikki.linnakangas 2875 EUB : * pause ends, but we use a timeout so we can check the above exit
2876 : * condition periodically too.
2877 : */
417 heikki.linnakangas 2878 GIC 5 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
417 heikki.linnakangas 2879 ECB : WAIT_EVENT_RECOVERY_PAUSE);
2880 : }
417 heikki.linnakangas 2881 GIC 1 : ConditionVariableCancelSleep();
2882 : }
2883 :
417 heikki.linnakangas 2884 ECB : /*
2885 : * When recovery_min_apply_delay is set, we wait long enough to make sure
2886 : * certain record types are applied at least that interval behind the primary.
2887 : *
2888 : * Returns true if we waited.
2889 : *
2890 : * Note that the delay is calculated between the WAL record log time and
2891 : * the current time on standby. We would prefer to keep track of when this
2892 : * standby received each WAL record, which would allow a more consistent
2893 : * approach and one not affected by time synchronisation issues, but that
2894 : * is significantly more effort and complexity for little actual gain in
2895 : * usability.
2896 : */
2897 : static bool
417 heikki.linnakangas 2898 GIC 2504088 : recoveryApplyDelay(XLogReaderState *record)
2899 : {
2900 : uint8 xact_info;
417 heikki.linnakangas 2901 ECB : TimestampTz xtime;
2902 : TimestampTz delayUntil;
2903 : long msecs;
2904 :
2905 : /* nothing to do if no delay configured */
417 heikki.linnakangas 2906 GIC 2504088 : if (recovery_min_apply_delay <= 0)
2907 2504088 : return false;
2908 :
2909 : /* no delay is applied on a database not yet consistent */
417 heikki.linnakangas 2910 UIC 0 : if (!reachedConsistency)
2911 0 : return false;
2912 :
2913 : /* nothing to do if crash recovery is requested */
2914 0 : if (!ArchiveRecoveryRequested)
2915 0 : return false;
2916 :
2917 : /*
2918 : * Is it a COMMIT record?
2919 : *
2920 : * We deliberately choose not to delay aborts since they have no effect on
417 heikki.linnakangas 2921 ECB : * MVCC. We already allow replay of records that don't have a timestamp,
2922 : * so there is already opportunity for issues caused by early conflicts on
2923 : * standbys.
2924 : */
417 heikki.linnakangas 2925 UIC 0 : if (XLogRecGetRmid(record) != RM_XACT_ID)
2926 0 : return false;
2927 :
2928 0 : xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
417 heikki.linnakangas 2929 ECB :
417 heikki.linnakangas 2930 LBC 0 : if (xact_info != XLOG_XACT_COMMIT &&
2931 : xact_info != XLOG_XACT_COMMIT_PREPARED)
417 heikki.linnakangas 2932 UIC 0 : return false;
417 heikki.linnakangas 2933 EUB :
417 heikki.linnakangas 2934 UBC 0 : if (!getRecordTimestamp(record, &xtime))
417 heikki.linnakangas 2935 UIC 0 : return false;
2936 :
417 heikki.linnakangas 2937 UBC 0 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
417 heikki.linnakangas 2938 EUB :
2939 : /*
2940 : * Exit without arming the latch if it's already past time to apply this
2941 : * record
2942 : */
417 heikki.linnakangas 2943 UIC 0 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), delayUntil);
2944 0 : if (msecs <= 0)
2945 0 : return false;
2946 :
2947 : while (true)
417 heikki.linnakangas 2948 EUB : {
417 heikki.linnakangas 2949 UBC 0 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
2950 :
2951 : /* This might change recovery_min_apply_delay. */
2952 0 : HandleStartupProcInterrupts();
2953 :
2954 0 : if (CheckForStandbyTrigger())
2955 0 : break;
2956 :
417 heikki.linnakangas 2957 EUB : /*
2958 : * Recalculate delayUntil as recovery_min_apply_delay could have
2959 : * changed while waiting in this loop.
2960 : */
417 heikki.linnakangas 2961 UIC 0 : delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
2962 :
417 heikki.linnakangas 2963 EUB : /*
2964 : * Wait for difference between GetCurrentTimestamp() and delayUntil.
2965 : */
417 heikki.linnakangas 2966 UIC 0 : msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
2967 : delayUntil);
2968 :
417 heikki.linnakangas 2969 UBC 0 : if (msecs <= 0)
417 heikki.linnakangas 2970 UIC 0 : break;
2971 :
417 heikki.linnakangas 2972 UBC 0 : elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
2973 :
2974 0 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
417 heikki.linnakangas 2975 EUB : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
2976 : msecs,
2977 : WAIT_EVENT_RECOVERY_APPLY_DELAY);
2978 : }
417 heikki.linnakangas 2979 UIC 0 : return true;
2980 : }
417 heikki.linnakangas 2981 EUB :
2982 : /*
2983 : * Get the current state of the recovery pause request.
2984 : */
2985 : RecoveryPauseState
417 heikki.linnakangas 2986 GBC 11 : GetRecoveryPauseState(void)
2987 : {
2988 : RecoveryPauseState state;
417 heikki.linnakangas 2989 EUB :
417 heikki.linnakangas 2990 GBC 11 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 2991 GIC 11 : state = XLogRecoveryCtl->recoveryPauseState;
417 heikki.linnakangas 2992 GBC 11 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
2993 :
2994 11 : return state;
2995 : }
2996 :
2997 : /*
2998 : * Set the recovery pause state.
417 heikki.linnakangas 2999 EUB : *
3000 : * If recovery pause is requested then sets the recovery pause state to
3001 : * 'pause requested' if it is not already 'paused'. Otherwise, sets it
3002 : * to 'not paused' to resume the recovery. The recovery pause will be
3003 : * confirmed by the ConfirmRecoveryPaused.
3004 : */
3005 : void
417 heikki.linnakangas 3006 CBC 39 : SetRecoveryPause(bool recoveryPause)
3007 : {
417 heikki.linnakangas 3008 GIC 39 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
3009 :
417 heikki.linnakangas 3010 CBC 39 : if (!recoveryPause)
3011 37 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
3012 2 : else if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
417 heikki.linnakangas 3013 GIC 2 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
417 heikki.linnakangas 3014 ECB :
417 heikki.linnakangas 3015 GIC 39 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3016 :
3017 39 : if (!recoveryPause)
3018 37 : ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV);
3019 39 : }
3020 :
3021 : /*
3022 : * Confirm the recovery pause by setting the recovery pause state to
3023 : * RECOVERY_PAUSED.
3024 : */
3025 : static void
417 heikki.linnakangas 3026 CBC 5 : ConfirmRecoveryPaused(void)
3027 : {
417 heikki.linnakangas 3028 ECB : /* If recovery pause is requested then set it paused */
417 heikki.linnakangas 3029 GIC 5 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 3030 CBC 5 : if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
3031 2 : XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED;
3032 5 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
3033 5 : }
3034 :
417 heikki.linnakangas 3035 ECB :
3036 : /*
3037 : * Attempt to read the next XLOG record.
3038 : *
3039 : * Before first call, the reader needs to be positioned to the first record
3040 : * by calling XLogPrefetcherBeginRead().
3041 : *
3042 : * If no valid record is available, returns NULL, or fails if emode is PANIC.
3043 : * (emode must be either PANIC, LOG). In standby mode, retries until a valid
3044 : * record is available.
3045 : */
3046 : static XLogRecord *
367 tmunro 3047 GIC 2506604 : ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
3048 : bool fetching_ckpt, TimeLineID replayTLI)
417 heikki.linnakangas 3049 ECB : {
3050 : XLogRecord *record;
367 tmunro 3051 CBC 2506604 : XLogReaderState *xlogreader = XLogPrefetcherGetReader(xlogprefetcher);
417 heikki.linnakangas 3052 2506604 : XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
417 heikki.linnakangas 3053 ECB :
3054 : /* Pass through parameters to XLogPageRead */
417 heikki.linnakangas 3055 GIC 2506604 : private->fetching_ckpt = fetching_ckpt;
3056 2506604 : private->emode = emode;
3057 2506604 : private->randAccess = (xlogreader->ReadRecPtr == InvalidXLogRecPtr);
3058 2506604 : private->replayTLI = replayTLI;
3059 :
3060 : /* This is the first attempt to read this page. */
3061 2506604 : lastSourceFailed = false;
3062 :
3063 : for (;;)
3064 79 : {
3065 : char *errormsg;
3066 :
367 tmunro 3067 CBC 2506683 : record = XLogPrefetcherReadRecord(xlogprefetcher, &errormsg);
417 heikki.linnakangas 3068 GIC 2506652 : if (record == NULL)
3069 : {
3070 : /*
223 rhaas 3071 ECB : * When we find that WAL ends in an incomplete record, keep track
3072 : * of that record. After recovery is done, we'll write a record
3073 : * to indicate to downstream WAL readers that that portion is to
3074 : * be ignored.
3075 : *
3076 : * However, when ArchiveRecoveryRequested = true, we're going to
3077 : * switch to a new timeline at the end of recovery. We will only
3078 : * copy WAL over to the new timeline up to the end of the last
3079 : * complete record, so if we did this, we would later create an
3080 : * overwrite contrecord in the wrong place, breaking everything.
417 heikki.linnakangas 3081 : */
223 rhaas 3082 GIC 195 : if (!ArchiveRecoveryRequested &&
417 heikki.linnakangas 3083 78 : !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr))
417 heikki.linnakangas 3084 ECB : {
417 heikki.linnakangas 3085 GIC 1 : abortedRecPtr = xlogreader->abortedRecPtr;
3086 1 : missingContrecPtr = xlogreader->missingContrecPtr;
417 heikki.linnakangas 3087 ECB : }
3088 :
417 heikki.linnakangas 3089 GIC 195 : if (readFile >= 0)
3090 : {
3091 179 : close(readFile);
3092 179 : readFile = -1;
3093 : }
3094 :
3095 : /*
3096 : * We only end up here without a message when XLogPageRead()
3097 : * failed - in that case we already logged something. In
3098 : * StandbyMode that only happens if we have been triggered, so we
3099 : * shouldn't loop anymore in that case.
3100 : */
3101 195 : if (errormsg)
417 heikki.linnakangas 3102 CBC 181 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
417 heikki.linnakangas 3103 ECB : (errmsg_internal("%s", errormsg) /* already translated */ ));
3104 : }
3105 :
3106 : /*
3107 : * Check page TLI is one of the expected values.
3108 : */
417 heikki.linnakangas 3109 CBC 2506457 : else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
3110 : {
417 heikki.linnakangas 3111 ECB : char fname[MAXFNAMELEN];
3112 : XLogSegNo segno;
3113 : int32 offset;
3114 :
417 heikki.linnakangas 3115 UIC 0 : XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
3116 0 : offset = XLogSegmentOffset(xlogreader->latestPagePtr,
3117 : wal_segment_size);
3118 0 : XLogFileName(fname, xlogreader->seg.ws_tli, segno,
3119 : wal_segment_size);
3120 0 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3121 : (errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u",
417 heikki.linnakangas 3122 ECB : xlogreader->latestPageTLI,
3123 : fname,
3124 : LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
3125 : offset)));
417 heikki.linnakangas 3126 UIC 0 : record = NULL;
3127 : }
3128 :
417 heikki.linnakangas 3129 GIC 2506652 : if (record)
417 heikki.linnakangas 3130 ECB : {
3131 : /* Great, got a record */
417 heikki.linnakangas 3132 GIC 2506573 : return record;
3133 : }
3134 : else
3135 : {
417 heikki.linnakangas 3136 EUB : /* No valid record available from this source */
417 heikki.linnakangas 3137 GBC 195 : lastSourceFailed = true;
3138 :
417 heikki.linnakangas 3139 EUB : /*
3140 : * If archive recovery was requested, but we were still doing
3141 : * crash recovery, switch to archive recovery and retry using the
3142 : * offline archive. We have now replayed all the valid WAL in
3143 : * pg_wal, so we are presumably now consistent.
3144 : *
3145 : * We require that there's at least some valid WAL present in
3146 : * pg_wal, however (!fetching_ckpt). We could recover using the
3147 : * WAL from the archive, even if pg_wal is completely empty, but
3148 : * we'd have no idea how far we'd have to replay to reach
3149 : * consistency. So err on the safe side and give up.
417 heikki.linnakangas 3150 ECB : */
417 heikki.linnakangas 3151 GIC 195 : if (!InArchiveRecovery && ArchiveRecoveryRequested &&
3152 2 : !fetching_ckpt)
417 heikki.linnakangas 3153 ECB : {
417 heikki.linnakangas 3154 GIC 2 : ereport(DEBUG1,
3155 : (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
3156 2 : InArchiveRecovery = true;
3157 2 : if (StandbyModeRequested)
62 rhaas 3158 CBC 2 : EnableStandbyMode();
3159 :
417 heikki.linnakangas 3160 GIC 2 : SwitchIntoArchiveRecovery(xlogreader->EndRecPtr, replayTLI);
3161 2 : minRecoveryPoint = xlogreader->EndRecPtr;
3162 2 : minRecoveryPointTLI = replayTLI;
3163 :
3164 2 : CheckRecoveryConsistency();
3165 :
3166 : /*
3167 : * Before we retry, reset lastSourceFailed and currentSource
3168 : * so that we will check the archive next.
3169 : */
3170 2 : lastSourceFailed = false;
3171 2 : currentSource = XLOG_FROM_ANY;
417 heikki.linnakangas 3172 ECB :
417 heikki.linnakangas 3173 CBC 79 : continue;
3174 : }
417 heikki.linnakangas 3175 ECB :
3176 : /* In standby mode, loop back to retry. Otherwise, give up. */
417 heikki.linnakangas 3177 CBC 193 : if (StandbyMode && !CheckForStandbyTrigger())
3178 77 : continue;
417 heikki.linnakangas 3179 ECB : else
417 heikki.linnakangas 3180 GIC 116 : return NULL;
417 heikki.linnakangas 3181 ECB : }
3182 : }
3183 : }
3184 :
3185 : /*
3186 : * Read the XLOG page containing targetPagePtr into readBuf (if not read
3187 : * already). Returns number of bytes read, if the page is read successfully,
3188 : * or XLREAD_FAIL in case of errors. When errors occur, they are ereport'ed,
3189 : * but only if they have not been previously reported.
3190 : *
3191 : * See XLogReaderRoutine.page_read for more details.
3192 : *
367 tmunro 3193 : * While prefetching, xlogreader->nonblocking may be set. In that case,
3194 : * returns XLREAD_WOULDBLOCK if we'd otherwise have to wait for more WAL.
3195 : *
417 heikki.linnakangas 3196 : * This is responsible for restoring files from archive as needed, as well
3197 : * as for waiting for the requested WAL record to arrive in standby mode.
3198 : *
3199 : * xlogreader->private_data->emode specifies the log level used for reporting
3200 : * "file not found" or "end of WAL" situations in archive recovery, or in
3201 : * standby mode when promotion is triggered. If set to WARNING or below,
3202 : * XLogPageRead() returns XLREAD_FAIL in those situations, on higher log
3203 : * levels the ereport() won't return.
3204 : *
3205 : * In standby mode, if after a successful return of XLogPageRead() the
3206 : * caller finds the record it's interested in to be broken, it should
3207 : * ereport the error with the level determined by
3208 : * emode_for_corrupt_record(), and then set lastSourceFailed
3209 : * and call XLogPageRead() again with the same arguments. This lets
3210 : * XLogPageRead() to try fetching the record from another source, or to
3211 : * sleep and retry.
3212 : */
3213 : static int
417 heikki.linnakangas 3214 GIC 82351 : XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
3215 : XLogRecPtr targetRecPtr, char *readBuf)
3216 : {
3217 82351 : XLogPageReadPrivate *private =
3218 : (XLogPageReadPrivate *) xlogreader->private_data;
3219 82351 : int emode = private->emode;
3220 : uint32 targetPageOff;
3221 : XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
3222 : int r;
3223 :
3224 82351 : XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
3225 82351 : targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
3226 :
3227 : /*
3228 : * See if we need to switch to a new segment because the requested record
3229 : * is not in the currently open one.
3230 : */
3231 82351 : if (readFile >= 0 &&
3232 80883 : !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size))
3233 : {
3234 : /*
3235 : * Request a restartpoint if we've replayed too much xlog since the
3236 : * last one.
417 heikki.linnakangas 3237 ECB : */
417 heikki.linnakangas 3238 GIC 126 : if (ArchiveRecoveryRequested && IsUnderPostmaster)
3239 : {
417 heikki.linnakangas 3240 CBC 118 : if (XLogCheckpointNeeded(readSegNo))
3241 : {
3242 40 : (void) GetRedoRecPtr();
417 heikki.linnakangas 3243 GIC 40 : if (XLogCheckpointNeeded(readSegNo))
3244 37 : RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
3245 : }
3246 : }
417 heikki.linnakangas 3247 ECB :
417 heikki.linnakangas 3248 CBC 126 : close(readFile);
417 heikki.linnakangas 3249 GIC 126 : readFile = -1;
3250 126 : readSource = XLOG_FROM_ANY;
3251 : }
3252 :
3253 82351 : XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size);
417 heikki.linnakangas 3254 ECB :
417 heikki.linnakangas 3255 CBC 82355 : retry:
3256 : /* See if we need to retrieve more data */
417 heikki.linnakangas 3257 GIC 82355 : if (readFile < 0 ||
3258 80757 : (readSource == XLOG_FROM_STREAM &&
3259 69133 : flushedUpto < targetPagePtr + reqLen))
3260 : {
367 tmunro 3261 CBC 28583 : if (readFile >= 0 &&
367 tmunro 3262 GIC 26985 : xlogreader->nonblocking &&
367 tmunro 3263 CBC 13320 : readSource == XLOG_FROM_STREAM &&
367 tmunro 3264 GIC 13320 : flushedUpto < targetPagePtr + reqLen)
367 tmunro 3265 CBC 13320 : return XLREAD_WOULDBLOCK;
367 tmunro 3266 ECB :
367 tmunro 3267 CBC 15232 : switch (WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
367 tmunro 3268 GIC 15263 : private->randAccess,
3269 15263 : private->fetching_ckpt,
3270 : targetRecPtr,
367 tmunro 3271 ECB : private->replayTLI,
3272 : xlogreader->EndRecPtr,
367 tmunro 3273 CBC 15263 : xlogreader->nonblocking))
3274 : {
367 tmunro 3275 GIC 84 : case XLREAD_WOULDBLOCK:
367 tmunro 3276 CBC 84 : return XLREAD_WOULDBLOCK;
367 tmunro 3277 GIC 30 : case XLREAD_FAIL:
367 tmunro 3278 CBC 30 : if (readFile >= 0)
367 tmunro 3279 UIC 0 : close(readFile);
367 tmunro 3280 CBC 30 : readFile = -1;
3281 30 : readLen = 0;
3282 30 : readSource = XLOG_FROM_ANY;
367 tmunro 3283 GIC 30 : return XLREAD_FAIL;
367 tmunro 3284 CBC 15118 : case XLREAD_SUCCESS:
3285 15118 : break;
417 heikki.linnakangas 3286 ECB : }
3287 : }
3288 :
3289 : /*
3290 : * At this point, we have the right segment open and if we're streaming we
3291 : * know the requested record is in it.
3292 : */
417 heikki.linnakangas 3293 GIC 68890 : Assert(readFile != -1);
3294 :
3295 : /*
417 heikki.linnakangas 3296 ECB : * If the current segment is being streamed from the primary, calculate
3297 : * how much of the current page we have received already. We know the
3298 : * requested record has been received, but this is for the benefit of
3299 : * future calls, to allow quick exit at the top of this function.
3300 : */
417 heikki.linnakangas 3301 CBC 68890 : if (readSource == XLOG_FROM_STREAM)
417 heikki.linnakangas 3302 EUB : {
417 heikki.linnakangas 3303 CBC 55862 : if (((targetPagePtr) / XLOG_BLCKSZ) != (flushedUpto / XLOG_BLCKSZ))
3304 42466 : readLen = XLOG_BLCKSZ;
417 heikki.linnakangas 3305 ECB : else
417 heikki.linnakangas 3306 CBC 13396 : readLen = XLogSegmentOffset(flushedUpto, wal_segment_size) -
417 heikki.linnakangas 3307 ECB : targetPageOff;
3308 : }
3309 : else
417 heikki.linnakangas 3310 GIC 13028 : readLen = XLOG_BLCKSZ;
3311 :
3312 : /* Read the requested page */
3313 68890 : readOff = targetPageOff;
3314 :
3315 68890 : pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
192 tmunro 3316 CBC 68890 : r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
417 heikki.linnakangas 3317 GIC 68890 : if (r != XLOG_BLCKSZ)
3318 : {
3319 : char fname[MAXFNAMELEN];
417 heikki.linnakangas 3320 UIC 0 : int save_errno = errno;
3321 :
3322 0 : pgstat_report_wait_end();
3323 0 : XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
417 heikki.linnakangas 3324 LBC 0 : if (r < 0)
3325 : {
3326 0 : errno = save_errno;
3327 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3328 : (errcode_for_file_access(),
3329 : errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m",
3330 : fname, LSN_FORMAT_ARGS(targetPagePtr),
3331 : readOff)));
3332 : }
3333 : else
3334 0 : ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
3335 : (errcode(ERRCODE_DATA_CORRUPTED),
3336 : errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu",
3337 : fname, LSN_FORMAT_ARGS(targetPagePtr),
3338 : readOff, r, (Size) XLOG_BLCKSZ)));
417 heikki.linnakangas 3339 UIC 0 : goto next_record_is_invalid;
417 heikki.linnakangas 3340 ECB : }
417 heikki.linnakangas 3341 CBC 68890 : pgstat_report_wait_end();
417 heikki.linnakangas 3342 ECB :
417 heikki.linnakangas 3343 GIC 68890 : Assert(targetSegNo == readSegNo);
3344 68890 : Assert(targetPageOff == readOff);
417 heikki.linnakangas 3345 GBC 68890 : Assert(reqLen <= readLen);
3346 :
3347 68890 : xlogreader->seg.ws_tli = curFileTLI;
417 heikki.linnakangas 3348 EUB :
3349 : /*
3350 : * Check the page header immediately, so that we can retry immediately if
3351 : * it's not valid. This may seem unnecessary, because ReadPageInternal()
3352 : * validates the page header anyway, and would propagate the failure up to
3353 : * ReadRecord(), which would retry. However, there's a corner case with
3354 : * continuation records, if a record is split across two pages such that
3355 : * we would need to read the two pages from different sources. For
3356 : * example, imagine a scenario where a streaming replica is started up,
3357 : * and replay reaches a record that's split across two WAL segments. The
3358 : * first page is only available locally, in pg_wal, because it's already
3359 : * been recycled on the primary. The second page, however, is not present
3360 : * in pg_wal, and we should stream it from the primary. There is a
3361 : * recycled WAL segment present in pg_wal, with garbage contents, however.
3362 : * We would read the first page from the local WAL segment, but when
3363 : * reading the second page, we would read the bogus, recycled, WAL
3364 : * segment. If we didn't catch that case here, we would never recover,
3365 : * because ReadRecord() would retry reading the whole record from the
417 heikki.linnakangas 3366 ECB : * beginning.
3367 : *
3368 : * Of course, this only catches errors in the page header, which is what
3369 : * happens in the case of a recycled WAL segment. Other kinds of errors or
3370 : * corruption still has the same problem. But this at least fixes the
3371 : * common case, which can happen as part of normal operation.
3372 : *
3373 : * Validating the page header is cheap enough that doing it twice
3374 : * shouldn't be a big deal from a performance point of view.
3375 : *
3376 : * When not in standby mode, an invalid page header should cause recovery
3377 : * to end, not retry reading the page, so we don't need to validate the
3378 : * page header here for the retry. Instead, ReadPageInternal() is
3379 : * responsible for the validation.
3380 : */
417 heikki.linnakangas 3381 GIC 68890 : if (StandbyMode &&
3382 58447 : !XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
3383 : {
3384 : /*
3385 : * Emit this error right now then retry this page immediately. Use
3386 : * errmsg_internal() because the message was already translated.
3387 : */
3388 6 : if (xlogreader->errormsg_buf[0])
3389 6 : ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
3390 : (errmsg_internal("%s", xlogreader->errormsg_buf)));
3391 :
3392 : /* reset any error XLogReaderValidatePageHeader() might have set */
218 tmunro 3393 6 : XLogReaderResetError(xlogreader);
417 heikki.linnakangas 3394 6 : goto next_record_is_invalid;
3395 : }
3396 :
3397 68884 : return readLen;
3398 :
3399 6 : next_record_is_invalid:
3400 :
3401 : /*
3402 : * If we're reading ahead, give up fast. Retries and error reporting will
3403 : * be handled by a later read when recovery catches up to this point.
3404 : */
218 tmunro 3405 6 : if (xlogreader->nonblocking)
218 tmunro 3406 CBC 2 : return XLREAD_WOULDBLOCK;
218 tmunro 3407 ECB :
417 heikki.linnakangas 3408 GIC 4 : lastSourceFailed = true;
3409 :
3410 4 : if (readFile >= 0)
3411 4 : close(readFile);
3412 4 : readFile = -1;
417 heikki.linnakangas 3413 CBC 4 : readLen = 0;
3414 4 : readSource = XLOG_FROM_ANY;
3415 :
3416 : /* In standby-mode, keep trying */
417 heikki.linnakangas 3417 GIC 4 : if (StandbyMode)
417 heikki.linnakangas 3418 CBC 4 : goto retry;
417 heikki.linnakangas 3419 ECB : else
367 tmunro 3420 UIC 0 : return XLREAD_FAIL;
3421 : }
417 heikki.linnakangas 3422 ECB :
3423 : /*
3424 : * Open the WAL segment containing WAL location 'RecPtr'.
3425 : *
3426 : * The segment can be fetched via restore_command, or via walreceiver having
3427 : * streamed the record, or it can already be present in pg_wal. Checking
3428 : * pg_wal is mainly for crash recovery, but it will be polled in standby mode
3429 : * too, in case someone copies a new segment directly to pg_wal. That is not
3430 : * documented or recommended, though.
3431 : *
3432 : * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
3433 : * prepare to read WAL starting from RedoStartLSN after this.
3434 : *
3435 : * 'RecPtr' might not point to the beginning of the record we're interested
3436 : * in, it might also point to the page or segment header. In that case,
3437 : * 'tliRecPtr' is the position of the WAL record we're interested in. It is
3438 : * used to decide which timeline to stream the requested WAL from.
3439 : *
3440 : * 'replayLSN' is the current replay LSN, so that if we scan for new
3441 : * timelines, we can reject a switch to a timeline that branched off before
3442 : * this point.
3443 : *
3444 : * If the record is not immediately available, the function returns false
417 heikki.linnakangas 3445 EUB : * if we're not in standby mode. In standby mode, waits for it to become
3446 : * available.
3447 : *
3448 : * When the requested record becomes available, the function opens the file
3449 : * containing it (if not open already), and returns XLREAD_SUCCESS. When end
3450 : * of standby mode is triggered by the user, and there is no more WAL
3451 : * available, returns XLREAD_FAIL.
3452 : *
3453 : * If nonblocking is true, then give up immediately if we can't satisfy the
3454 : * request, returning XLREAD_WOULDBLOCK instead of waiting.
3455 : */
3456 : static XLogPageReadResult
417 heikki.linnakangas 3457 GIC 15263 : WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
3458 : bool fetching_ckpt, XLogRecPtr tliRecPtr,
3459 : TimeLineID replayTLI, XLogRecPtr replayLSN,
3460 : bool nonblocking)
3461 : {
3462 : static TimestampTz last_fail_time = 0;
3463 : TimestampTz now;
3464 15263 : bool streaming_reply_sent = false;
3465 :
3466 : /*-------
3467 : * Standby mode is implemented by a state machine:
3468 : *
3469 : * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
3470 : * pg_wal (XLOG_FROM_PG_WAL)
3471 : * 2. Check for promotion trigger request
3472 : * 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
3473 : * 4. Rescan timelines
3474 : * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
3475 : *
3476 : * Failure to read from the current source advances the state machine to
3477 : * the next state.
3478 : *
3479 : * 'currentSource' indicates the current state. There are no currentSource
3480 : * values for "check trigger", "rescan timelines", and "sleep" states,
3481 : * those actions are taken when reading from the previous source fails, as
417 heikki.linnakangas 3482 ECB : * part of advancing to the next state.
3483 : *
3484 : * If standby mode is turned off while reading WAL from stream, we move
3485 : * to XLOG_FROM_ARCHIVE and reset lastSourceFailed, to force fetching
3486 : * the files (which would be required at end of recovery, e.g., timeline
3487 : * history file) from archive or pg_wal. We don't need to kill WAL receiver
3488 : * here because it's already stopped when standby mode is turned off at
3489 : * the end of recovery.
3490 : *-------
3491 : */
417 heikki.linnakangas 3492 GIC 15263 : if (!InArchiveRecovery)
3493 1193 : currentSource = XLOG_FROM_PG_WAL;
3494 14070 : else if (currentSource == XLOG_FROM_ANY ||
3495 13995 : (!StandbyMode && currentSource == XLOG_FROM_STREAM))
3496 : {
3497 75 : lastSourceFailed = false;
3498 75 : currentSource = XLOG_FROM_ARCHIVE;
3499 : }
3500 :
3501 : for (;;)
3502 13317 : {
3503 28580 : XLogSource oldSource = currentSource;
3504 28580 : bool startWalReceiver = false;
3505 :
3506 : /*
3507 : * First check if we failed to read from the current source, and
3508 : * advance the state machine if so. The failure to read might've
3509 : * happened outside this function, e.g when a CRC check fails on a
3510 : * record, or within this loop.
3511 : */
3512 28580 : if (lastSourceFailed)
3513 : {
3514 : /*
3515 : * Don't allow any retry loops to occur during nonblocking
3516 : * readahead. Let the caller process everything that has been
367 tmunro 3517 ECB : * decoded already first.
3518 : */
367 tmunro 3519 CBC 358 : if (nonblocking)
3520 54 : return XLREAD_WOULDBLOCK;
3521 :
417 heikki.linnakangas 3522 304 : switch (currentSource)
417 heikki.linnakangas 3523 ECB : {
417 heikki.linnakangas 3524 GIC 179 : case XLOG_FROM_ARCHIVE:
3525 : case XLOG_FROM_PG_WAL:
3526 :
417 heikki.linnakangas 3527 ECB : /*
3528 : * Check to see if promotion is requested. Note that we do
3529 : * this only after failure, so when you promote, we still
3530 : * finish replaying as much as we can from archive and
3531 : * pg_wal before failover.
3532 : */
417 heikki.linnakangas 3533 GIC 179 : if (StandbyMode && CheckForStandbyTrigger())
3534 : {
3535 16 : XLogShutdownWalRcv();
367 tmunro 3536 16 : return XLREAD_FAIL;
417 heikki.linnakangas 3537 ECB : }
3538 :
3539 : /*
3540 : * Not in standby mode, and we've now tried the archive
3541 : * and pg_wal.
3542 : */
417 heikki.linnakangas 3543 GIC 163 : if (!StandbyMode)
367 tmunro 3544 CBC 14 : return XLREAD_FAIL;
417 heikki.linnakangas 3545 ECB :
3546 : /*
3547 : * Move to XLOG_FROM_STREAM state, and set to start a
3548 : * walreceiver if necessary.
3549 : */
417 heikki.linnakangas 3550 GIC 149 : currentSource = XLOG_FROM_STREAM;
3551 149 : startWalReceiver = true;
3552 149 : break;
3553 :
3554 125 : case XLOG_FROM_STREAM:
3555 :
3556 : /*
3557 : * Failure while streaming. Most likely, we got here
417 heikki.linnakangas 3558 ECB : * because streaming replication was terminated, or
3559 : * promotion was triggered. But we also get here if we
3560 : * find an invalid record in the WAL streamed from the
3561 : * primary, in which case something is seriously wrong.
3562 : * There's little chance that the problem will just go
3563 : * away, but PANIC is not good for availability either,
3564 : * especially in hot standby mode. So, we treat that the
3565 : * same as disconnection, and retry from archive/pg_wal
3566 : * again. The WAL in the archive should be identical to
3567 : * what was streamed, so it's unlikely that it helps, but
3568 : * one can hope...
3569 : */
3570 :
3571 : /*
3572 : * We should be able to move to XLOG_FROM_STREAM only in
3573 : * standby mode.
3574 : */
417 heikki.linnakangas 3575 CBC 125 : Assert(StandbyMode);
417 heikki.linnakangas 3576 ECB :
3577 : /*
3578 : * Before we leave XLOG_FROM_STREAM state, make sure that
3579 : * walreceiver is not active, so that it won't overwrite
3580 : * WAL that we restore from archive.
3581 : */
206 noah 3582 GIC 125 : XLogShutdownWalRcv();
3583 :
3584 : /*
3585 : * Before we sleep, re-scan for possible new timelines if
3586 : * we were requested to recover to the latest timeline.
3587 : */
417 heikki.linnakangas 3588 125 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
3589 : {
3590 125 : if (rescanLatestTimeLine(replayTLI, replayLSN))
3591 : {
3592 5 : currentSource = XLOG_FROM_ARCHIVE;
3593 5 : break;
3594 : }
3595 : }
3596 :
3597 : /*
3598 : * XLOG_FROM_STREAM is the last state in our state
3599 : * machine, so we've exhausted all the options for
417 heikki.linnakangas 3600 ECB : * obtaining the requested WAL. We're going to loop back
3601 : * and retry from the archive, but if it hasn't been long
3602 : * since last attempt, sleep wal_retrieve_retry_interval
3603 : * milliseconds to avoid busy-waiting.
3604 : */
417 heikki.linnakangas 3605 GIC 120 : now = GetCurrentTimestamp();
3606 120 : if (!TimestampDifferenceExceeds(last_fail_time, now,
417 heikki.linnakangas 3607 ECB : wal_retrieve_retry_interval))
3608 : {
3609 : long wait_time;
3610 :
417 heikki.linnakangas 3611 GIC 102 : wait_time = wal_retrieve_retry_interval -
3612 51 : TimestampDifferenceMilliseconds(last_fail_time, now);
417 heikki.linnakangas 3613 ECB :
417 heikki.linnakangas 3614 GIC 51 : elog(LOG, "waiting for WAL to become available at %X/%X",
417 heikki.linnakangas 3615 ECB : LSN_FORMAT_ARGS(RecPtr));
3616 :
131 tgl 3617 : /* Do background tasks that might benefit us later. */
131 tgl 3618 CBC 51 : KnownAssignedTransactionIdsIdleMaintenance();
3619 :
417 heikki.linnakangas 3620 GIC 51 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3621 : WL_LATCH_SET | WL_TIMEOUT |
3622 : WL_EXIT_ON_PM_DEATH,
3623 : wait_time,
3624 : WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
3625 51 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3626 51 : now = GetCurrentTimestamp();
3627 :
3628 : /* Handle interrupt signals of startup process */
3629 51 : HandleStartupProcInterrupts();
417 heikki.linnakangas 3630 ECB : }
417 heikki.linnakangas 3631 CBC 113 : last_fail_time = now;
417 heikki.linnakangas 3632 GIC 113 : currentSource = XLOG_FROM_ARCHIVE;
3633 113 : break;
3634 :
417 heikki.linnakangas 3635 UIC 0 : default:
417 heikki.linnakangas 3636 LBC 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
417 heikki.linnakangas 3637 ECB : }
3638 : }
417 heikki.linnakangas 3639 CBC 28222 : else if (currentSource == XLOG_FROM_PG_WAL)
3640 : {
3641 : /*
3642 : * We just successfully read a file in pg_wal. We prefer files in
417 heikki.linnakangas 3643 ECB : * the archive over ones in pg_wal, so try the next file again
3644 : * from the archive first.
3645 : */
417 heikki.linnakangas 3646 GIC 1190 : if (InArchiveRecovery)
417 heikki.linnakangas 3647 UIC 0 : currentSource = XLOG_FROM_ARCHIVE;
3648 : }
3649 :
417 heikki.linnakangas 3650 CBC 28489 : if (currentSource != oldSource)
3651 267 : elog(DEBUG2, "switched WAL source from %s to %s after %s",
3652 : xlogSourceNames[oldSource], xlogSourceNames[currentSource],
3653 : lastSourceFailed ? "failure" : "success");
417 heikki.linnakangas 3654 ECB :
3655 : /*
3656 : * We've now handled possible failure. Try to read from the chosen
3657 : * source.
3658 : */
417 heikki.linnakangas 3659 GIC 28489 : lastSourceFailed = false;
417 heikki.linnakangas 3660 EUB :
417 heikki.linnakangas 3661 GBC 28489 : switch (currentSource)
3662 : {
417 heikki.linnakangas 3663 GIC 1542 : case XLOG_FROM_ARCHIVE:
417 heikki.linnakangas 3664 ECB : case XLOG_FROM_PG_WAL:
3665 :
3666 : /*
3667 : * WAL receiver must not be running when reading WAL from
3668 : * archive or pg_wal.
3669 : */
417 heikki.linnakangas 3670 GIC 1542 : Assert(!WalRcvStreaming());
417 heikki.linnakangas 3671 ECB :
417 heikki.linnakangas 3672 EUB : /* Close any old file we might have open. */
417 heikki.linnakangas 3673 GIC 1542 : if (readFile >= 0)
3674 : {
417 heikki.linnakangas 3675 CBC 67 : close(readFile);
3676 67 : readFile = -1;
3677 : }
3678 : /* Reset curFileTLI if random fetch. */
417 heikki.linnakangas 3679 GIC 1542 : if (randAccess)
3680 1293 : curFileTLI = 0;
3681 :
3682 : /*
3683 : * Try to restore the file from archive, or read an existing
417 heikki.linnakangas 3684 ECB : * file from pg_wal.
3685 : */
417 heikki.linnakangas 3686 CBC 1542 : readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
417 heikki.linnakangas 3687 GIC 1542 : currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
417 heikki.linnakangas 3688 ECB : currentSource);
417 heikki.linnakangas 3689 GIC 1542 : if (readFile >= 0)
367 tmunro 3690 1404 : return XLREAD_SUCCESS; /* success! */
3691 :
3692 : /*
3693 : * Nope, not found in archive or pg_wal.
3694 : */
417 heikki.linnakangas 3695 CBC 138 : lastSourceFailed = true;
417 heikki.linnakangas 3696 GIC 138 : break;
3697 :
417 heikki.linnakangas 3698 CBC 26947 : case XLOG_FROM_STREAM:
3699 : {
417 heikki.linnakangas 3700 ECB : bool havedata;
3701 :
3702 : /*
3703 : * We should be able to move to XLOG_FROM_STREAM only in
3704 : * standby mode.
3705 : */
417 heikki.linnakangas 3706 GIC 26947 : Assert(StandbyMode);
3707 :
3708 : /*
3709 : * First, shutdown walreceiver if its restart has been
3710 : * requested -- but no point if we're already slated for
417 heikki.linnakangas 3711 ECB : * starting it.
3712 : */
417 heikki.linnakangas 3713 GIC 26947 : if (pendingWalRcvRestart && !startWalReceiver)
417 heikki.linnakangas 3714 ECB : {
417 heikki.linnakangas 3715 LBC 0 : XLogShutdownWalRcv();
3716 :
3717 : /*
3718 : * Re-scan for possible new timelines if we were
3719 : * requested to recover to the latest timeline.
417 heikki.linnakangas 3720 ECB : */
417 heikki.linnakangas 3721 LBC 0 : if (recoveryTargetTimeLineGoal ==
3722 : RECOVERY_TARGET_TIMELINE_LATEST)
3723 0 : rescanLatestTimeLine(replayTLI, replayLSN);
3724 :
417 heikki.linnakangas 3725 UIC 0 : startWalReceiver = true;
3726 : }
417 heikki.linnakangas 3727 GIC 26947 : pendingWalRcvRestart = false;
3728 :
3729 : /*
3730 : * Launch walreceiver if needed.
417 heikki.linnakangas 3731 ECB : *
3732 : * If fetching_ckpt is true, RecPtr points to the initial
3733 : * checkpoint location. In that case, we use RedoStartLSN
3734 : * as the streaming start position instead of RecPtr, so
3735 : * that when we later jump backwards to start redo at
3736 : * RedoStartLSN, we will have the logs streamed already.
3737 : */
417 heikki.linnakangas 3738 CBC 26947 : if (startWalReceiver &&
417 heikki.linnakangas 3739 GIC 149 : PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
417 heikki.linnakangas 3740 EUB : {
3741 : XLogRecPtr ptr;
3742 : TimeLineID tli;
3743 :
417 heikki.linnakangas 3744 GIC 125 : if (fetching_ckpt)
3745 : {
417 heikki.linnakangas 3746 UBC 0 : ptr = RedoStartLSN;
417 heikki.linnakangas 3747 UIC 0 : tli = RedoStartTLI;
417 heikki.linnakangas 3748 EUB : }
3749 : else
3750 : {
417 heikki.linnakangas 3751 GIC 125 : ptr = RecPtr;
417 heikki.linnakangas 3752 ECB :
3753 : /*
3754 : * Use the record begin position to determine the
3755 : * TLI, rather than the position we're reading.
3756 : */
417 heikki.linnakangas 3757 GIC 125 : tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
3758 :
3759 125 : if (curFileTLI > 0 && tli < curFileTLI)
417 heikki.linnakangas 3760 UIC 0 : elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
3761 : LSN_FORMAT_ARGS(tliRecPtr),
3762 : tli, curFileTLI);
417 heikki.linnakangas 3763 ECB : }
417 heikki.linnakangas 3764 CBC 125 : curFileTLI = tli;
417 heikki.linnakangas 3765 GIC 125 : SetInstallXLogFileSegmentActive();
3766 125 : RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
3767 : PrimarySlotName,
3768 : wal_receiver_create_temp_slot);
417 heikki.linnakangas 3769 CBC 125 : flushedUpto = 0;
3770 : }
417 heikki.linnakangas 3771 EUB :
3772 : /*
3773 : * Check if WAL receiver is active or wait to start up.
3774 : */
417 heikki.linnakangas 3775 GIC 26947 : if (!WalRcvStreaming())
417 heikki.linnakangas 3776 ECB : {
417 heikki.linnakangas 3777 GIC 94 : lastSourceFailed = true;
3778 94 : break;
3779 : }
3780 :
3781 : /*
417 heikki.linnakangas 3782 ECB : * Walreceiver is active, so see if new data has arrived.
3783 : *
3784 : * We only advance XLogReceiptTime when we obtain fresh
417 heikki.linnakangas 3785 EUB : * WAL from walreceiver and observe that we had already
3786 : * processed everything before the most recent "chunk"
3787 : * that it flushed to disk. In steady state where we are
3788 : * keeping up with the incoming data, XLogReceiptTime will
417 heikki.linnakangas 3789 ECB : * be updated on each cycle. When we are behind,
3790 : * XLogReceiptTime will not advance, so the grace time
3791 : * allotted to conflicting queries will decrease.
3792 : */
417 heikki.linnakangas 3793 GIC 26853 : if (RecPtr < flushedUpto)
417 heikki.linnakangas 3794 CBC 148 : havedata = true;
3795 : else
3796 : {
3797 : XLogRecPtr latestChunkStart;
3798 :
417 heikki.linnakangas 3799 GIC 26705 : flushedUpto = GetWalRcvFlushRecPtr(&latestChunkStart, &receiveTLI);
417 heikki.linnakangas 3800 CBC 26705 : if (RecPtr < flushedUpto && receiveTLI == curFileTLI)
3801 : {
3802 13700 : havedata = true;
3803 13700 : if (latestChunkStart <= RecPtr)
3804 : {
417 heikki.linnakangas 3805 GIC 12659 : XLogReceiptTime = GetCurrentTimestamp();
3806 12659 : SetCurrentChunkStartTime(XLogReceiptTime);
3807 : }
3808 : }
3809 : else
3810 13005 : havedata = false;
3811 : }
3812 26853 : if (havedata)
3813 : {
3814 : /*
3815 : * Great, streamed far enough. Open the file if it's
3816 : * not open already. Also read the timeline history
3817 : * file if we haven't initialized timeline history
417 heikki.linnakangas 3818 ECB : * yet; it should be streamed over and present in
3819 : * pg_wal by now. Use XLOG_FROM_STREAM so that source
3820 : * info is set correctly and XLogReceiptTime isn't
3821 : * changed.
3822 : *
3823 : * NB: We must set readTimeLineHistory based on
3824 : * recoveryTargetTLI, not receiveTLI. Normally they'll
3825 : * be the same, but if recovery_target_timeline is
3826 : * 'latest' and archiving is configured, then it's
3827 : * possible that we managed to retrieve one or more
3828 : * new timeline history files from the archive,
3829 : * updating recoveryTargetTLI.
3830 : */
417 heikki.linnakangas 3831 CBC 13848 : if (readFile < 0)
3832 : {
417 heikki.linnakangas 3833 GIC 134 : if (!expectedTLEs)
417 heikki.linnakangas 3834 UIC 0 : expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
417 heikki.linnakangas 3835 CBC 134 : readFile = XLogFileRead(readSegNo, PANIC,
3836 : receiveTLI,
417 heikki.linnakangas 3837 ECB : XLOG_FROM_STREAM, false);
417 heikki.linnakangas 3838 GIC 134 : Assert(readFile >= 0);
3839 : }
3840 : else
3841 : {
3842 : /* just make sure source info is correct... */
3843 13714 : readSource = XLOG_FROM_STREAM;
3844 13714 : XLogReceiptSource = XLOG_FROM_STREAM;
367 tmunro 3845 13714 : return XLREAD_SUCCESS;
3846 : }
417 heikki.linnakangas 3847 134 : break;
3848 : }
3849 :
3850 : /* In nonblocking mode, return rather than sleeping. */
367 tmunro 3851 13005 : if (nonblocking)
3852 30 : return XLREAD_WOULDBLOCK;
3853 :
3854 : /*
3855 : * Data not here yet. Check for trigger, then wait for
417 heikki.linnakangas 3856 ECB : * walreceiver to wake us up when new WAL arrives.
3857 : */
417 heikki.linnakangas 3858 CBC 12975 : if (CheckForStandbyTrigger())
417 heikki.linnakangas 3859 EUB : {
417 heikki.linnakangas 3860 ECB : /*
3861 : * Note that we don't return XLREAD_FAIL immediately
3862 : * here. After being triggered, we still want to
367 tmunro 3863 : * replay all the WAL that was already streamed. It's
3864 : * in pg_wal now, so we just treat this as a failure,
3865 : * and the state machine will move on to replay the
3866 : * streamed WAL from pg_wal, and then recheck the
3867 : * trigger and exit replay.
417 heikki.linnakangas 3868 : */
417 heikki.linnakangas 3869 CBC 31 : lastSourceFailed = true;
3870 31 : break;
3871 : }
417 heikki.linnakangas 3872 ECB :
3873 : /*
3874 : * Since we have replayed everything we have received so
3875 : * far and are about to start waiting for more WAL, let's
3876 : * tell the upstream server our replay location now so
3877 : * that pg_stat_replication doesn't show stale
3878 : * information.
3879 : */
417 heikki.linnakangas 3880 GIC 12944 : if (!streaming_reply_sent)
3881 : {
3882 11679 : WalRcvForceReply();
417 heikki.linnakangas 3883 CBC 11679 : streaming_reply_sent = true;
3884 : }
3885 :
3886 : /* Do any background tasks that might benefit us later. */
131 tgl 3887 GIC 12944 : KnownAssignedTransactionIdsIdleMaintenance();
3888 :
3889 : /* Update pg_stat_recovery_prefetch before sleeping. */
367 tmunro 3890 12944 : XLogPrefetcherComputeStats(xlogprefetcher);
3891 :
3892 : /*
3893 : * Wait for more WAL to arrive, when we will be woken
3894 : * immediately by the WAL receiver.
3895 : */
417 heikki.linnakangas 3896 12944 : (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
3897 : WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
3898 : -1L,
3899 : WAIT_EVENT_RECOVERY_WAL_STREAM);
3900 12944 : ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
3901 12944 : break;
3902 : }
3903 :
417 heikki.linnakangas 3904 LBC 0 : default:
417 heikki.linnakangas 3905 UIC 0 : elog(ERROR, "unexpected WAL source %d", currentSource);
417 heikki.linnakangas 3906 ECB : }
3907 :
3908 : /*
3909 : * Check for recovery pause here so that we can confirm more quickly
3910 : * that a requested pause has actually taken effect.
3911 : */
417 heikki.linnakangas 3912 GIC 13341 : if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
3913 : RECOVERY_NOT_PAUSED)
417 heikki.linnakangas 3914 CBC 2 : recoveryPausesHere(false);
3915 :
3916 : /*
3917 : * This possibly-long loop needs to handle interrupts of startup
3918 : * process.
3919 : */
3920 13341 : HandleStartupProcInterrupts();
3921 : }
3922 :
3923 : return XLREAD_FAIL; /* not reached */
417 heikki.linnakangas 3924 ECB : }
3925 :
3926 :
3927 : /*
417 heikki.linnakangas 3928 EUB : * Determine what log level should be used to report a corrupt WAL record
3929 : * in the current WAL page, previously read by XLogPageRead().
3930 : *
3931 : * 'emode' is the error mode that would be used to report a file-not-found
3932 : * or legitimate end-of-WAL situation. Generally, we use it as-is, but if
3933 : * we're retrying the exact same record that we've tried previously, only
3934 : * complain the first time to keep the noise down. However, we only do when
3935 : * reading from pg_wal, because we don't expect any invalid records in archive
417 heikki.linnakangas 3936 ECB : * or in records streamed from the primary. Files in the archive should be complete,
3937 : * and we should never hit the end of WAL because we stop and wait for more WAL
3938 : * to arrive before replaying it.
3939 : *
3940 : * NOTE: This function remembers the RecPtr value it was last called with,
3941 : * to suppress repeated messages about the same record. Only call this when
3942 : * you are about to ereport(), or you might cause a later message to be
3943 : * erroneously suppressed.
3944 : */
3945 : static int
417 heikki.linnakangas 3946 GIC 187 : emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
3947 : {
3948 : static XLogRecPtr lastComplaint = 0;
3949 :
3950 187 : if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
3951 : {
3952 185 : if (RecPtr == lastComplaint)
3953 49 : emode = DEBUG1;
3954 : else
3955 136 : lastComplaint = RecPtr;
3956 : }
3957 187 : return emode;
3958 : }
3959 :
3960 :
3961 : /*
3962 : * Subroutine to try to fetch and validate a prior checkpoint record.
3963 : */
3964 : static XLogRecord *
367 tmunro 3965 1176 : ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, XLogRecPtr RecPtr,
3966 : TimeLineID replayTLI)
417 heikki.linnakangas 3967 ECB : {
3968 : XLogRecord *record;
3969 : uint8 info;
3970 :
417 heikki.linnakangas 3971 CBC 1176 : Assert(xlogreader != NULL);
3972 :
3973 1176 : if (!XRecOffIsValid(RecPtr))
417 heikki.linnakangas 3974 ECB : {
263 fujii 3975 UNC 0 : ereport(LOG,
3976 : (errmsg("invalid checkpoint location")));
417 heikki.linnakangas 3977 UIC 0 : return NULL;
3978 : }
3979 :
367 tmunro 3980 CBC 1176 : XLogPrefetcherBeginRead(xlogprefetcher, RecPtr);
367 tmunro 3981 GIC 1176 : record = ReadRecord(xlogprefetcher, LOG, true, replayTLI);
417 heikki.linnakangas 3982 ECB :
417 heikki.linnakangas 3983 GIC 1176 : if (record == NULL)
417 heikki.linnakangas 3984 EUB : {
263 fujii 3985 UNC 0 : ereport(LOG,
3986 : (errmsg("invalid checkpoint record")));
417 heikki.linnakangas 3987 UIC 0 : return NULL;
417 heikki.linnakangas 3988 EUB : }
417 heikki.linnakangas 3989 GIC 1176 : if (record->xl_rmid != RM_XLOG_ID)
417 heikki.linnakangas 3990 EUB : {
263 fujii 3991 UNC 0 : ereport(LOG,
3992 : (errmsg("invalid resource manager ID in checkpoint record")));
417 heikki.linnakangas 3993 UBC 0 : return NULL;
3994 : }
417 heikki.linnakangas 3995 GBC 1176 : info = record->xl_info & ~XLR_INFO_MASK;
417 heikki.linnakangas 3996 GIC 1176 : if (info != XLOG_CHECKPOINT_SHUTDOWN &&
417 heikki.linnakangas 3997 ECB : info != XLOG_CHECKPOINT_ONLINE)
3998 : {
263 fujii 3999 UNC 0 : ereport(LOG,
4000 : (errmsg("invalid xl_info in checkpoint record")));
417 heikki.linnakangas 4001 UIC 0 : return NULL;
4002 : }
417 heikki.linnakangas 4003 GIC 1176 : if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
4004 : {
263 fujii 4005 UNC 0 : ereport(LOG,
4006 : (errmsg("invalid length of checkpoint record")));
417 heikki.linnakangas 4007 UIC 0 : return NULL;
4008 : }
417 heikki.linnakangas 4009 GIC 1176 : return record;
417 heikki.linnakangas 4010 ECB : }
4011 :
4012 : /*
4013 : * Scan for new timelines that might have appeared in the archive since we
4014 : * started recovery.
4015 : *
4016 : * If there are any, the function changes recovery target TLI to the latest
4017 : * one and returns 'true'.
4018 : */
4019 : static bool
417 heikki.linnakangas 4020 GIC 125 : rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
417 heikki.linnakangas 4021 ECB : {
4022 : List *newExpectedTLEs;
4023 : bool found;
4024 : ListCell *cell;
4025 : TimeLineID newtarget;
417 heikki.linnakangas 4026 GIC 125 : TimeLineID oldtarget = recoveryTargetTLI;
417 heikki.linnakangas 4027 CBC 125 : TimeLineHistoryEntry *currentTle = NULL;
4028 :
417 heikki.linnakangas 4029 GBC 125 : newtarget = findNewestTimeLine(recoveryTargetTLI);
417 heikki.linnakangas 4030 GIC 125 : if (newtarget == recoveryTargetTLI)
4031 : {
4032 : /* No new timelines found */
417 heikki.linnakangas 4033 GBC 120 : return false;
4034 : }
4035 :
4036 : /*
4037 : * Determine the list of expected TLIs for the new TLI
4038 : */
4039 :
417 heikki.linnakangas 4040 GIC 5 : newExpectedTLEs = readTimeLineHistory(newtarget);
417 heikki.linnakangas 4041 ECB :
4042 : /*
417 heikki.linnakangas 4043 EUB : * If the current timeline is not part of the history of the new timeline,
4044 : * we cannot proceed to it.
4045 : */
417 heikki.linnakangas 4046 GIC 5 : found = false;
4047 10 : foreach(cell, newExpectedTLEs)
417 heikki.linnakangas 4048 EUB : {
417 heikki.linnakangas 4049 GIC 10 : currentTle = (TimeLineHistoryEntry *) lfirst(cell);
4050 :
4051 10 : if (currentTle->tli == recoveryTargetTLI)
417 heikki.linnakangas 4052 ECB : {
417 heikki.linnakangas 4053 CBC 5 : found = true;
4054 5 : break;
4055 : }
4056 : }
417 heikki.linnakangas 4057 GIC 5 : if (!found)
4058 : {
417 heikki.linnakangas 4059 UIC 0 : ereport(LOG,
417 heikki.linnakangas 4060 ECB : (errmsg("new timeline %u is not a child of database system timeline %u",
4061 : newtarget,
4062 : replayTLI)));
417 heikki.linnakangas 4063 UIC 0 : return false;
4064 : }
4065 :
417 heikki.linnakangas 4066 ECB : /*
4067 : * The current timeline was found in the history file, but check that the
4068 : * next timeline was forked off from it *after* the current recovery
4069 : * location.
4070 : */
417 heikki.linnakangas 4071 GIC 5 : if (currentTle->end < replayLSN)
4072 : {
417 heikki.linnakangas 4073 UIC 0 : ereport(LOG,
4074 : (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
4075 : newtarget,
4076 : replayTLI,
417 heikki.linnakangas 4077 ECB : LSN_FORMAT_ARGS(replayLSN))));
417 heikki.linnakangas 4078 UIC 0 : return false;
4079 : }
4080 :
4081 : /* The new timeline history seems valid. Switch target */
417 heikki.linnakangas 4082 GIC 5 : recoveryTargetTLI = newtarget;
4083 5 : list_free_deep(expectedTLEs);
4084 5 : expectedTLEs = newExpectedTLEs;
417 heikki.linnakangas 4085 ECB :
4086 : /*
4087 : * As in StartupXLOG(), try to ensure we have all the history files
4088 : * between the old target and new target in pg_wal.
4089 : */
417 heikki.linnakangas 4090 GIC 5 : restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
417 heikki.linnakangas 4091 ECB :
417 heikki.linnakangas 4092 GIC 5 : ereport(LOG,
417 heikki.linnakangas 4093 ECB : (errmsg("new target timeline is %u",
4094 : recoveryTargetTLI)));
4095 :
417 heikki.linnakangas 4096 GIC 5 : return true;
4097 : }
4098 :
417 heikki.linnakangas 4099 ECB :
4100 : /*
4101 : * Open a logfile segment for reading (during recovery).
4102 : *
4103 : * If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
4104 : * Otherwise, it's assumed to be already available in pg_wal.
4105 : */
4106 : static int
417 heikki.linnakangas 4107 GBC 2018 : XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
417 heikki.linnakangas 4108 EUB : XLogSource source, bool notfoundOk)
4109 : {
4110 : char xlogfname[MAXFNAMELEN];
4111 : char activitymsg[MAXFNAMELEN + 16];
4112 : char path[MAXPGPATH];
4113 : int fd;
4114 :
417 heikki.linnakangas 4115 CBC 2018 : XLogFileName(xlogfname, tli, segno, wal_segment_size);
4116 :
4117 2018 : switch (source)
417 heikki.linnakangas 4118 ECB : {
417 heikki.linnakangas 4119 GIC 359 : case XLOG_FROM_ARCHIVE:
4120 : /* Report recovery progress in PS display */
4121 359 : snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
4122 : xlogfname);
417 heikki.linnakangas 4123 CBC 359 : set_ps_display(activitymsg);
4124 :
417 heikki.linnakangas 4125 GIC 359 : if (!RestoreArchivedFile(path, xlogfname,
417 heikki.linnakangas 4126 ECB : "RECOVERYXLOG",
4127 : wal_segment_size,
4128 : InRedo))
417 heikki.linnakangas 4129 GIC 335 : return -1;
417 heikki.linnakangas 4130 CBC 24 : break;
4131 :
417 heikki.linnakangas 4132 GIC 1659 : case XLOG_FROM_PG_WAL:
417 heikki.linnakangas 4133 ECB : case XLOG_FROM_STREAM:
417 heikki.linnakangas 4134 GIC 1659 : XLogFilePath(path, tli, segno, wal_segment_size);
417 heikki.linnakangas 4135 CBC 1659 : break;
4136 :
417 heikki.linnakangas 4137 UIC 0 : default:
417 heikki.linnakangas 4138 LBC 0 : elog(ERROR, "invalid XLogFileRead source %d", source);
417 heikki.linnakangas 4139 ECB : }
4140 :
4141 : /*
4142 : * If the segment was fetched from archival storage, replace the existing
4143 : * xlog segment (if any) with the archival version.
4144 : */
417 heikki.linnakangas 4145 GIC 1683 : if (source == XLOG_FROM_ARCHIVE)
417 heikki.linnakangas 4146 ECB : {
417 heikki.linnakangas 4147 GBC 24 : Assert(!IsInstallXLogFileSegmentActive());
417 heikki.linnakangas 4148 GIC 24 : KeepFileRestoredFromArchive(path, xlogfname);
4149 :
417 heikki.linnakangas 4150 ECB : /*
4151 : * Set path to point at the new file in pg_wal.
4152 : */
417 heikki.linnakangas 4153 GIC 24 : snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
4154 : }
4155 :
4156 1683 : fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
4157 1683 : if (fd >= 0)
4158 : {
417 heikki.linnakangas 4159 ECB : /* Success! */
417 heikki.linnakangas 4160 GIC 1538 : curFileTLI = tli;
4161 :
4162 : /* Report recovery progress in PS display */
4163 1538 : snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
4164 : xlogfname);
4165 1538 : set_ps_display(activitymsg);
4166 :
4167 : /* Track source of data in assorted state variables */
4168 1538 : readSource = source;
4169 1538 : XLogReceiptSource = source;
4170 : /* In FROM_STREAM case, caller tracks receipt time, not me */
4171 1538 : if (source != XLOG_FROM_STREAM)
4172 1404 : XLogReceiptTime = GetCurrentTimestamp();
4173 :
4174 1538 : return fd;
4175 : }
4176 145 : if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
417 heikki.linnakangas 4177 UIC 0 : ereport(PANIC,
4178 : (errcode_for_file_access(),
4179 : errmsg("could not open file \"%s\": %m", path)));
417 heikki.linnakangas 4180 GIC 145 : return -1;
4181 : }
4182 :
4183 : /*
417 heikki.linnakangas 4184 ECB : * Open a logfile segment for reading (during recovery).
4185 : *
4186 : * This version searches for the segment with any TLI listed in expectedTLEs.
4187 : */
4188 : static int
417 heikki.linnakangas 4189 CBC 1542 : XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source)
4190 : {
417 heikki.linnakangas 4191 ECB : char path[MAXPGPATH];
4192 : ListCell *cell;
4193 : int fd;
4194 : List *tles;
4195 :
4196 : /*
4197 : * Loop looking for a suitable timeline ID: we might need to read any of
4198 : * the timelines listed in expectedTLEs.
4199 : *
4200 : * We expect curFileTLI on entry to be the TLI of the preceding file in
4201 : * sequence, or 0 if there was no predecessor. We do not allow curFileTLI
4202 : * to go backwards; this prevents us from picking up the wrong file when a
4203 : * parent timeline extends to higher segment numbers than the child we
4204 : * want to read.
4205 : *
4206 : * If we haven't read the timeline history file yet, read it now, so that
4207 : * we know which TLIs to scan. We don't save the list in expectedTLEs,
4208 : * however, unless we actually find a valid segment. That way if there is
4209 : * neither a timeline history file nor a WAL segment in the archive, and
4210 : * streaming replication is set up, we'll read the timeline history file
4211 : * streamed from the primary when we start streaming, instead of
4212 : * recovering with a dummy history generated here.
4213 : */
417 heikki.linnakangas 4214 GIC 1542 : if (expectedTLEs)
4215 366 : tles = expectedTLEs;
4216 : else
417 heikki.linnakangas 4217 CBC 1176 : tles = readTimeLineHistory(recoveryTargetTLI);
417 heikki.linnakangas 4218 ECB :
417 heikki.linnakangas 4219 GIC 1693 : foreach(cell, tles)
4220 : {
417 heikki.linnakangas 4221 CBC 1557 : TimeLineHistoryEntry *hent = (TimeLineHistoryEntry *) lfirst(cell);
417 heikki.linnakangas 4222 GIC 1557 : TimeLineID tli = hent->tli;
417 heikki.linnakangas 4223 ECB :
417 heikki.linnakangas 4224 GIC 1557 : if (tli < curFileTLI)
417 heikki.linnakangas 4225 CBC 2 : break; /* don't bother looking at too-old TLIs */
4226 :
417 heikki.linnakangas 4227 ECB : /*
4228 : * Skip scanning the timeline ID that the logfile segment to read
4229 : * doesn't belong to
4230 : */
417 heikki.linnakangas 4231 GIC 1555 : if (hent->begin != InvalidXLogRecPtr)
4232 : {
4233 48 : XLogSegNo beginseg = 0;
417 heikki.linnakangas 4234 ECB :
417 heikki.linnakangas 4235 GIC 48 : XLByteToSeg(hent->begin, beginseg, wal_segment_size);
417 heikki.linnakangas 4236 ECB :
4237 : /*
4238 : * The logfile segment that doesn't belong to the timeline is
4239 : * older or newer than the segment that the timeline started or
4240 : * ended at, respectively. It's sufficient to check only the
4241 : * starting segment of the timeline here. Since the timelines are
4242 : * scanned in descending order in this loop, any segments newer
4243 : * than the ending segment should belong to newer timeline and
4244 : * have already been read before. So it's not necessary to check
4245 : * the ending segment of the timeline here.
4246 : */
417 heikki.linnakangas 4247 GIC 48 : if (segno < beginseg)
417 heikki.linnakangas 4248 CBC 6 : continue;
417 heikki.linnakangas 4249 ECB : }
4250 :
417 heikki.linnakangas 4251 GIC 1549 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
4252 : {
417 heikki.linnakangas 4253 CBC 359 : fd = XLogFileRead(segno, emode, tli,
4254 : XLOG_FROM_ARCHIVE, true);
417 heikki.linnakangas 4255 GIC 359 : if (fd != -1)
4256 : {
4257 24 : elog(DEBUG1, "got WAL segment from archive");
4258 24 : if (!expectedTLEs)
4259 12 : expectedTLEs = tles;
4260 1404 : return fd;
417 heikki.linnakangas 4261 EUB : }
4262 : }
4263 :
417 heikki.linnakangas 4264 GIC 1525 : if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
417 heikki.linnakangas 4265 EUB : {
417 heikki.linnakangas 4266 GIC 1525 : fd = XLogFileRead(segno, emode, tli,
4267 : XLOG_FROM_PG_WAL, true);
417 heikki.linnakangas 4268 GBC 1525 : if (fd != -1)
4269 : {
4270 1380 : if (!expectedTLEs)
417 heikki.linnakangas 4271 GIC 1164 : expectedTLEs = tles;
4272 1380 : return fd;
4273 : }
4274 : }
4275 : }
4276 :
4277 : /* Couldn't find it. For simplicity, complain about front timeline */
4278 138 : XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size);
4279 138 : errno = ENOENT;
417 heikki.linnakangas 4280 CBC 138 : ereport(emode,
4281 : (errcode_for_file_access(),
4282 : errmsg("could not open file \"%s\": %m", path)));
417 heikki.linnakangas 4283 GIC 138 : return -1;
4284 : }
4285 :
4286 : /*
417 heikki.linnakangas 4287 ECB : * Set flag to signal the walreceiver to restart. (The startup process calls
4288 : * this on noticing a relevant configuration change.)
4289 : */
4290 : void
417 heikki.linnakangas 4291 LBC 0 : StartupRequestWalReceiverRestart(void)
417 heikki.linnakangas 4292 ECB : {
417 heikki.linnakangas 4293 UIC 0 : if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
417 heikki.linnakangas 4294 ECB : {
417 heikki.linnakangas 4295 UIC 0 : ereport(LOG,
4296 : (errmsg("WAL receiver process shutdown requested")));
4297 :
417 heikki.linnakangas 4298 LBC 0 : pendingWalRcvRestart = true;
4299 : }
4300 0 : }
417 heikki.linnakangas 4301 ECB :
4302 :
4303 : /*
4304 : * Has a standby promotion already been triggered?
4305 : *
4306 : * Unlike CheckForStandbyTrigger(), this works in any process
4307 : * that's connected to shared memory.
4308 : */
4309 : bool
417 heikki.linnakangas 4310 CBC 42 : PromoteIsTriggered(void)
4311 : {
417 heikki.linnakangas 4312 ECB : /*
4313 : * We check shared state each time only until a standby promotion is
4314 : * triggered. We can't trigger a promotion again, so there's no need to
4315 : * keep checking after the shared variable has once been seen true.
4316 : */
417 heikki.linnakangas 4317 GIC 42 : if (LocalPromoteIsTriggered)
4318 36 : return true;
417 heikki.linnakangas 4319 ECB :
417 heikki.linnakangas 4320 GIC 6 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 4321 CBC 6 : LocalPromoteIsTriggered = XLogRecoveryCtl->SharedPromoteIsTriggered;
4322 6 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4323 :
4324 6 : return LocalPromoteIsTriggered;
4325 : }
417 heikki.linnakangas 4326 ECB :
4327 : static void
417 heikki.linnakangas 4328 CBC 36 : SetPromoteIsTriggered(void)
417 heikki.linnakangas 4329 ECB : {
417 heikki.linnakangas 4330 CBC 36 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 4331 GIC 36 : XLogRecoveryCtl->SharedPromoteIsTriggered = true;
4332 36 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 4333 ECB :
4334 : /*
4335 : * Mark the recovery pause state as 'not paused' because the paused state
4336 : * ends and promotion continues if a promotion is triggered while recovery
4337 : * is paused. Otherwise pg_get_wal_replay_pause_state() can mistakenly
4338 : * return 'paused' while a promotion is ongoing.
4339 : */
417 heikki.linnakangas 4340 CBC 36 : SetRecoveryPause(false);
4341 :
4342 36 : LocalPromoteIsTriggered = true;
4343 36 : }
4344 :
4345 : /*
4346 : * Check whether a promote request has arrived.
4347 : */
417 heikki.linnakangas 4348 ECB : static bool
417 heikki.linnakangas 4349 GIC 13259 : CheckForStandbyTrigger(void)
4350 : {
417 heikki.linnakangas 4351 CBC 13259 : if (LocalPromoteIsTriggered)
417 heikki.linnakangas 4352 GIC 48 : return true;
417 heikki.linnakangas 4353 ECB :
417 heikki.linnakangas 4354 GIC 13211 : if (IsPromoteSignaled() && CheckPromoteSignal())
4355 : {
4356 36 : ereport(LOG, (errmsg("received promote request")));
4357 36 : RemovePromoteSignalFiles();
4358 36 : ResetPromoteSignaled();
4359 36 : SetPromoteIsTriggered();
4360 36 : return true;
417 heikki.linnakangas 4361 ECB : }
4362 :
417 heikki.linnakangas 4363 GIC 13175 : return false;
4364 : }
4365 :
4366 : /*
4367 : * Remove the files signaling a standby promotion request.
417 heikki.linnakangas 4368 ECB : */
4369 : void
417 heikki.linnakangas 4370 GIC 629 : RemovePromoteSignalFiles(void)
4371 : {
4372 629 : unlink(PROMOTE_SIGNAL_FILE);
4373 629 : }
4374 :
417 heikki.linnakangas 4375 ECB : /*
4376 : * Check to see if a promote request has arrived.
4377 : */
4378 : bool
417 heikki.linnakangas 4379 GIC 607 : CheckPromoteSignal(void)
417 heikki.linnakangas 4380 ECB : {
4381 : struct stat stat_buf;
4382 :
417 heikki.linnakangas 4383 GIC 607 : if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
417 heikki.linnakangas 4384 CBC 72 : return true;
4385 :
417 heikki.linnakangas 4386 GIC 535 : return false;
4387 : }
4388 :
4389 : /*
4390 : * Wake up startup process to replay newly arrived WAL, or to notice that
4391 : * failover has been requested.
4392 : */
417 heikki.linnakangas 4393 EUB : void
417 heikki.linnakangas 4394 GIC 19191 : WakeupRecovery(void)
417 heikki.linnakangas 4395 EUB : {
417 heikki.linnakangas 4396 GBC 19191 : SetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
417 heikki.linnakangas 4397 GIC 19191 : }
4398 :
4399 : /*
4400 : * Schedule a walreceiver wakeup in the main recovery loop.
4401 : */
4402 : void
417 heikki.linnakangas 4403 UIC 0 : XLogRequestWalReceiverReply(void)
4404 : {
417 heikki.linnakangas 4405 LBC 0 : doRequestWalReceiverReply = true;
417 heikki.linnakangas 4406 UIC 0 : }
4407 :
4408 : /*
4409 : * Is HotStandby active yet? This is only important in special backends
417 heikki.linnakangas 4410 ECB : * since normal backends won't ever be able to connect until this returns
4411 : * true. Postmaster knows this by way of signal, not via shared memory.
4412 : *
4413 : * Unlike testing standbyState, this works in any process that's connected to
4414 : * shared memory. (And note that standbyState alone doesn't tell the truth
4415 : * anyway.)
4416 : */
4417 : bool
417 heikki.linnakangas 4418 GIC 125 : HotStandbyActive(void)
4419 : {
4420 : /*
4421 : * We check shared state each time only until Hot Standby is active. We
4422 : * can't de-activate Hot Standby, so there's no need to keep checking
4423 : * after the shared variable has once been seen true.
4424 : */
4425 125 : if (LocalHotStandbyActive)
4426 20 : return true;
4427 : else
417 heikki.linnakangas 4428 ECB : {
4429 : /* spinlock is essential on machines with weak memory ordering! */
417 heikki.linnakangas 4430 GIC 105 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4431 105 : LocalHotStandbyActive = XLogRecoveryCtl->SharedHotStandbyActive;
4432 105 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 4433 ECB :
417 heikki.linnakangas 4434 CBC 105 : return LocalHotStandbyActive;
417 heikki.linnakangas 4435 ECB : }
4436 : }
4437 :
4438 : /*
4439 : * Like HotStandbyActive(), but to be used only in WAL replay code,
4440 : * where we don't need to ask any other process what the state is.
4441 : */
4442 : static bool
417 heikki.linnakangas 4443 UIC 0 : HotStandbyActiveInReplay(void)
4444 : {
4445 0 : Assert(AmStartupProcess() || !IsPostmasterEnvironment);
4446 0 : return LocalHotStandbyActive;
4447 : }
4448 :
4449 : /*
4450 : * Get latest redo apply position.
417 heikki.linnakangas 4451 ECB : *
4452 : * Exported to allow WALReceiver to read the pointer directly.
4453 : */
4454 : XLogRecPtr
417 heikki.linnakangas 4455 CBC 54667 : GetXLogReplayRecPtr(TimeLineID *replayTLI)
417 heikki.linnakangas 4456 ECB : {
4457 : XLogRecPtr recptr;
4458 : TimeLineID tli;
4459 :
417 heikki.linnakangas 4460 GIC 54667 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4461 54667 : recptr = XLogRecoveryCtl->lastReplayedEndRecPtr;
417 heikki.linnakangas 4462 CBC 54667 : tli = XLogRecoveryCtl->lastReplayedTLI;
417 heikki.linnakangas 4463 GIC 54667 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4464 :
4465 54667 : if (replayTLI)
417 heikki.linnakangas 4466 CBC 1779 : *replayTLI = tli;
4467 54667 : return recptr;
417 heikki.linnakangas 4468 ECB : }
4469 :
4470 :
4471 : /*
4472 : * Get position of last applied, or the record being applied.
4473 : *
4474 : * This is different from GetXLogReplayRecPtr() in that if a WAL
4475 : * record is currently being applied, this includes that record.
4476 : */
4477 : XLogRecPtr
417 heikki.linnakangas 4478 GIC 6048 : GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
4479 : {
417 heikki.linnakangas 4480 ECB : XLogRecPtr recptr;
4481 : TimeLineID tli;
4482 :
417 heikki.linnakangas 4483 CBC 6048 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4484 6048 : recptr = XLogRecoveryCtl->replayEndRecPtr;
4485 6048 : tli = XLogRecoveryCtl->replayEndTLI;
417 heikki.linnakangas 4486 GIC 6048 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4487 :
4488 6048 : if (replayEndTLI)
4489 6048 : *replayEndTLI = tli;
4490 6048 : return recptr;
4491 : }
417 heikki.linnakangas 4492 ECB :
4493 : /*
4494 : * Save timestamp of latest processed commit/abort record.
4495 : *
4496 : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4497 : * seen by processes other than the startup process. Note in particular
4498 : * that CreateRestartPoint is executed in the checkpointer.
4499 : */
4500 : static void
417 heikki.linnakangas 4501 GIC 18179 : SetLatestXTime(TimestampTz xtime)
4502 : {
4503 18179 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4504 18179 : XLogRecoveryCtl->recoveryLastXTime = xtime;
4505 18179 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4506 18179 : }
4507 :
417 heikki.linnakangas 4508 ECB : /*
4509 : * Fetch timestamp of latest processed commit/abort record.
4510 : */
4511 : TimestampTz
417 heikki.linnakangas 4512 GIC 136 : GetLatestXTime(void)
4513 : {
417 heikki.linnakangas 4514 ECB : TimestampTz xtime;
4515 :
417 heikki.linnakangas 4516 CBC 136 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4517 136 : xtime = XLogRecoveryCtl->recoveryLastXTime;
4518 136 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4519 :
417 heikki.linnakangas 4520 GIC 136 : return xtime;
4521 : }
4522 :
4523 : /*
4524 : * Save timestamp of the next chunk of WAL records to apply.
417 heikki.linnakangas 4525 ECB : *
4526 : * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
4527 : * seen by all backends.
4528 : */
417 heikki.linnakangas 4529 EUB : static void
417 heikki.linnakangas 4530 GIC 12659 : SetCurrentChunkStartTime(TimestampTz xtime)
417 heikki.linnakangas 4531 EUB : {
417 heikki.linnakangas 4532 GIC 12659 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
417 heikki.linnakangas 4533 GBC 12659 : XLogRecoveryCtl->currentChunkStartTime = xtime;
417 heikki.linnakangas 4534 GIC 12659 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4535 12659 : }
4536 :
4537 : /*
4538 : * Fetch timestamp of latest processed commit/abort record.
4539 : * Startup process maintains an accurate local copy in XLogReceiptTime
4540 : */
417 heikki.linnakangas 4541 EUB : TimestampTz
417 heikki.linnakangas 4542 GIC 606 : GetCurrentChunkReplayStartTime(void)
417 heikki.linnakangas 4543 EUB : {
4544 : TimestampTz xtime;
4545 :
417 heikki.linnakangas 4546 GIC 606 : SpinLockAcquire(&XLogRecoveryCtl->info_lck);
4547 606 : xtime = XLogRecoveryCtl->currentChunkStartTime;
417 heikki.linnakangas 4548 GBC 606 : SpinLockRelease(&XLogRecoveryCtl->info_lck);
4549 :
4550 606 : return xtime;
4551 : }
417 heikki.linnakangas 4552 EUB :
4553 : /*
4554 : * Returns time of receipt of current chunk of XLOG data, as well as
4555 : * whether it was received from streaming replication or from archives.
4556 : */
4557 : void
417 heikki.linnakangas 4558 GIC 29 : GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
4559 : {
4560 : /*
4561 : * This must be executed in the startup process, since we don't export the
4562 : * relevant state to shared memory.
4563 : */
4564 29 : Assert(InRecovery);
4565 :
4566 29 : *rtime = XLogReceiptTime;
4567 29 : *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
417 heikki.linnakangas 4568 GBC 29 : }
4569 :
4570 : /*
4571 : * Note that text field supplied is a parameter name and does not require
4572 : * translation
4573 : */
4574 : void
417 heikki.linnakangas 4575 GIC 375 : RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
417 heikki.linnakangas 4576 EUB : {
417 heikki.linnakangas 4577 GIC 375 : if (currValue < minValue)
4578 : {
417 heikki.linnakangas 4579 UIC 0 : if (HotStandbyActiveInReplay())
4580 : {
4581 0 : bool warned_for_promote = false;
4582 :
417 heikki.linnakangas 4583 UBC 0 : ereport(WARNING,
4584 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4585 : errmsg("hot standby is not possible because of insufficient parameter settings"),
417 heikki.linnakangas 4586 EUB : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4587 : param_name,
4588 : currValue,
4589 : minValue)));
4590 :
417 heikki.linnakangas 4591 UIC 0 : SetRecoveryPause(true);
4592 :
4593 0 : ereport(LOG,
4594 : (errmsg("recovery has paused"),
4595 : errdetail("If recovery is unpaused, the server will shut down."),
4596 : errhint("You can then restart the server after making the necessary configuration changes.")));
4597 :
4598 0 : while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
417 heikki.linnakangas 4599 ECB : {
417 heikki.linnakangas 4600 UIC 0 : HandleStartupProcInterrupts();
4601 :
4602 0 : if (CheckForStandbyTrigger())
4603 : {
4604 0 : if (!warned_for_promote)
4605 0 : ereport(WARNING,
417 heikki.linnakangas 4606 ECB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4607 : errmsg("promotion is not possible because of insufficient parameter settings"),
4608 :
4609 : /*
417 heikki.linnakangas 4610 EUB : * Repeat the detail from above so it's easy to find
4611 : * in the log.
417 heikki.linnakangas 4612 ECB : */
4613 : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4614 : param_name,
4615 : currValue,
4616 : minValue),
4617 : errhint("Restart the server after making the necessary configuration changes.")));
417 heikki.linnakangas 4618 UIC 0 : warned_for_promote = true;
4619 : }
4620 :
4621 : /*
4622 : * If recovery pause is requested then set it paused. While
4623 : * we are in the loop, user might resume and pause again so
4624 : * set this every time.
4625 : */
4626 0 : ConfirmRecoveryPaused();
4627 :
4628 : /*
4629 : * We wait on a condition variable that will wake us as soon
4630 : * as the pause ends, but we use a timeout so we can check the
4631 : * above conditions periodically too.
4632 : */
4633 0 : ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
4634 : WAIT_EVENT_RECOVERY_PAUSE);
417 heikki.linnakangas 4635 ECB : }
417 heikki.linnakangas 4636 UIC 0 : ConditionVariableCancelSleep();
417 heikki.linnakangas 4637 ECB : }
4638 :
417 heikki.linnakangas 4639 UIC 0 : ereport(FATAL,
4640 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4641 : errmsg("recovery aborted because of insufficient parameter settings"),
4642 : /* Repeat the detail from above so it's easy to find in the log. */
4643 : errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
4644 : param_name,
4645 : currValue,
4646 : minValue),
417 heikki.linnakangas 4647 ECB : errhint("You can restart the server after making the necessary configuration changes.")));
4648 : }
417 heikki.linnakangas 4649 CBC 375 : }
4650 :
4651 :
4652 : /*
4653 : * GUC check_hook for primary_slot_name
4654 : */
4655 : bool
208 tgl 4656 GNC 1944 : check_primary_slot_name(char **newval, void **extra, GucSource source)
4657 : {
4658 1944 : if (*newval && strcmp(*newval, "") != 0 &&
4659 86 : !ReplicationSlotValidateName(*newval, WARNING))
208 tgl 4660 UNC 0 : return false;
4661 :
208 tgl 4662 GNC 1944 : return true;
4663 : }
4664 :
4665 : /*
4666 : * Recovery target settings: Only one of the several recovery_target* settings
4667 : * may be set. Setting a second one results in an error. The global variable
4668 : * recoveryTarget tracks which kind of recovery target was chosen. Other
4669 : * variables store the actual target value (for example a string or a xid).
4670 : * The assign functions of the parameters check whether a competing parameter
4671 : * was already set. But we want to allow setting the same parameter multiple
4672 : * times. We also want to allow unsetting a parameter and setting a different
4673 : * one, so we unset recoveryTarget when the parameter is set to an empty
4674 : * string.
4675 : *
4676 : * XXX this code is broken by design. Throwing an error from a GUC assign
4677 : * hook breaks fundamental assumptions of guc.c. So long as all the variables
4678 : * for which this can happen are PGC_POSTMASTER, the consequences are limited,
4679 : * since we'd just abort postmaster startup anyway. Nonetheless it's likely
4680 : * that we have odd behaviors such as unexpected GUC ordering dependencies.
4681 : */
4682 :
4683 : static void
4684 : pg_attribute_noreturn()
4685 1 : error_multiple_recovery_targets(void)
4686 : {
4687 1 : ereport(ERROR,
4688 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4689 : errmsg("multiple recovery targets specified"),
4690 : errdetail("At most one of recovery_target, recovery_target_lsn, recovery_target_name, recovery_target_time, recovery_target_xid may be set.")));
4691 : }
4692 :
4693 : /*
4694 : * GUC check_hook for recovery_target
4695 : */
4696 : bool
4697 1858 : check_recovery_target(char **newval, void **extra, GucSource source)
4698 : {
4699 1858 : if (strcmp(*newval, "immediate") != 0 && strcmp(*newval, "") != 0)
4700 : {
208 tgl 4701 UNC 0 : GUC_check_errdetail("The only allowed value is \"immediate\".");
4702 0 : return false;
4703 : }
208 tgl 4704 GNC 1858 : return true;
4705 : }
4706 :
4707 : /*
4708 : * GUC assign_hook for recovery_target
4709 : */
4710 : void
4711 1858 : assign_recovery_target(const char *newval, void *extra)
4712 : {
4713 1858 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
208 tgl 4714 UNC 0 : recoveryTarget != RECOVERY_TARGET_IMMEDIATE)
4715 0 : error_multiple_recovery_targets();
4716 :
208 tgl 4717 GNC 1858 : if (newval && strcmp(newval, "") != 0)
4718 1 : recoveryTarget = RECOVERY_TARGET_IMMEDIATE;
4719 : else
4720 1857 : recoveryTarget = RECOVERY_TARGET_UNSET;
4721 1858 : }
4722 :
4723 : /*
4724 : * GUC check_hook for recovery_target_lsn
4725 : */
4726 : bool
4727 1858 : check_recovery_target_lsn(char **newval, void **extra, GucSource source)
4728 : {
4729 1858 : if (strcmp(*newval, "") != 0)
4730 : {
4731 : XLogRecPtr lsn;
4732 : XLogRecPtr *myextra;
4733 1 : bool have_error = false;
4734 :
4735 1 : lsn = pg_lsn_in_internal(*newval, &have_error);
4736 1 : if (have_error)
208 tgl 4737 UNC 0 : return false;
4738 :
208 tgl 4739 GNC 1 : myextra = (XLogRecPtr *) guc_malloc(ERROR, sizeof(XLogRecPtr));
4740 1 : *myextra = lsn;
4741 1 : *extra = (void *) myextra;
4742 : }
4743 1858 : return true;
4744 : }
4745 :
4746 : /*
4747 : * GUC assign_hook for recovery_target_lsn
4748 : */
4749 : void
4750 1858 : assign_recovery_target_lsn(const char *newval, void *extra)
4751 : {
4752 1858 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
208 tgl 4753 UNC 0 : recoveryTarget != RECOVERY_TARGET_LSN)
4754 0 : error_multiple_recovery_targets();
4755 :
208 tgl 4756 GNC 1858 : if (newval && strcmp(newval, "") != 0)
4757 : {
4758 1 : recoveryTarget = RECOVERY_TARGET_LSN;
4759 1 : recoveryTargetLSN = *((XLogRecPtr *) extra);
4760 : }
4761 : else
4762 1857 : recoveryTarget = RECOVERY_TARGET_UNSET;
4763 1858 : }
4764 :
4765 : /*
4766 : * GUC check_hook for recovery_target_name
4767 : */
4768 : bool
4769 1864 : check_recovery_target_name(char **newval, void **extra, GucSource source)
4770 : {
4771 : /* Use the value of newval directly */
4772 1864 : if (strlen(*newval) >= MAXFNAMELEN)
4773 : {
208 tgl 4774 UNC 0 : GUC_check_errdetail("%s is too long (maximum %d characters).",
4775 : "recovery_target_name", MAXFNAMELEN - 1);
4776 0 : return false;
4777 : }
208 tgl 4778 GNC 1864 : return true;
4779 : }
4780 :
4781 : /*
4782 : * GUC assign_hook for recovery_target_name
4783 : */
4784 : void
4785 1864 : assign_recovery_target_name(const char *newval, void *extra)
4786 : {
4787 1864 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
208 tgl 4788 UNC 0 : recoveryTarget != RECOVERY_TARGET_NAME)
4789 0 : error_multiple_recovery_targets();
4790 :
208 tgl 4791 GNC 1864 : if (newval && strcmp(newval, "") != 0)
4792 : {
4793 6 : recoveryTarget = RECOVERY_TARGET_NAME;
4794 6 : recoveryTargetName = newval;
4795 : }
4796 : else
4797 1858 : recoveryTarget = RECOVERY_TARGET_UNSET;
4798 1864 : }
4799 :
4800 : /*
4801 : * GUC check_hook for recovery_target_time
4802 : *
4803 : * The interpretation of the recovery_target_time string can depend on the
4804 : * time zone setting, so we need to wait until after all GUC processing is
4805 : * done before we can do the final parsing of the string. This check function
4806 : * only does a parsing pass to catch syntax errors, but we store the string
4807 : * and parse it again when we need to use it.
4808 : */
4809 : bool
4810 1860 : check_recovery_target_time(char **newval, void **extra, GucSource source)
4811 : {
4812 1860 : if (strcmp(*newval, "") != 0)
4813 : {
4814 : /* reject some special values */
4815 3 : if (strcmp(*newval, "now") == 0 ||
4816 3 : strcmp(*newval, "today") == 0 ||
4817 3 : strcmp(*newval, "tomorrow") == 0 ||
4818 3 : strcmp(*newval, "yesterday") == 0)
4819 : {
208 tgl 4820 UNC 0 : return false;
4821 : }
4822 :
4823 : /*
4824 : * parse timestamp value (see also timestamptz_in())
4825 : */
4826 : {
208 tgl 4827 GNC 3 : char *str = *newval;
4828 : fsec_t fsec;
4829 : struct pg_tm tt,
4830 3 : *tm = &tt;
4831 : int tz;
4832 : int dtype;
4833 : int nf;
4834 : int dterr;
4835 : char *field[MAXDATEFIELDS];
4836 : int ftype[MAXDATEFIELDS];
4837 : char workbuf[MAXDATELEN + MAXDATEFIELDS];
4838 : DateTimeErrorExtra dtextra;
4839 : TimestampTz timestamp;
4840 :
4841 3 : dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
4842 : field, ftype, MAXDATEFIELDS, &nf);
4843 3 : if (dterr == 0)
121 4844 3 : dterr = DecodeDateTime(field, ftype, nf,
4845 : &dtype, tm, &fsec, &tz, &dtextra);
208 4846 3 : if (dterr != 0)
208 tgl 4847 UNC 0 : return false;
208 tgl 4848 GNC 3 : if (dtype != DTK_DATE)
208 tgl 4849 UNC 0 : return false;
4850 :
208 tgl 4851 GNC 3 : if (tm2timestamp(tm, fsec, &tz, ×tamp) != 0)
4852 : {
208 tgl 4853 UNC 0 : GUC_check_errdetail("timestamp out of range: \"%s\"", str);
4854 0 : return false;
4855 : }
4856 : }
4857 : }
208 tgl 4858 GNC 1860 : return true;
4859 : }
4860 :
4861 : /*
4862 : * GUC assign_hook for recovery_target_time
4863 : */
4864 : void
4865 1860 : assign_recovery_target_time(const char *newval, void *extra)
4866 : {
4867 1860 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
4868 1 : recoveryTarget != RECOVERY_TARGET_TIME)
4869 1 : error_multiple_recovery_targets();
4870 :
4871 1859 : if (newval && strcmp(newval, "") != 0)
4872 2 : recoveryTarget = RECOVERY_TARGET_TIME;
4873 : else
4874 1857 : recoveryTarget = RECOVERY_TARGET_UNSET;
4875 1859 : }
4876 :
4877 : /*
4878 : * GUC check_hook for recovery_target_timeline
4879 : */
4880 : bool
4881 1858 : check_recovery_target_timeline(char **newval, void **extra, GucSource source)
4882 : {
4883 : RecoveryTargetTimeLineGoal rttg;
4884 : RecoveryTargetTimeLineGoal *myextra;
4885 :
4886 1858 : if (strcmp(*newval, "current") == 0)
208 tgl 4887 UNC 0 : rttg = RECOVERY_TARGET_TIMELINE_CONTROLFILE;
208 tgl 4888 GNC 1858 : else if (strcmp(*newval, "latest") == 0)
4889 1858 : rttg = RECOVERY_TARGET_TIMELINE_LATEST;
4890 : else
4891 : {
208 tgl 4892 UNC 0 : rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
4893 :
4894 0 : errno = 0;
4895 0 : strtoul(*newval, NULL, 0);
4896 0 : if (errno == EINVAL || errno == ERANGE)
4897 : {
4898 0 : GUC_check_errdetail("recovery_target_timeline is not a valid number.");
4899 0 : return false;
4900 : }
4901 : }
4902 :
208 tgl 4903 GNC 1858 : myextra = (RecoveryTargetTimeLineGoal *) guc_malloc(ERROR, sizeof(RecoveryTargetTimeLineGoal));
4904 1858 : *myextra = rttg;
4905 1858 : *extra = (void *) myextra;
4906 :
4907 1858 : return true;
4908 : }
4909 :
4910 : /*
4911 : * GUC assign_hook for recovery_target_timeline
4912 : */
4913 : void
4914 1858 : assign_recovery_target_timeline(const char *newval, void *extra)
4915 : {
4916 1858 : recoveryTargetTimeLineGoal = *((RecoveryTargetTimeLineGoal *) extra);
4917 1858 : if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
208 tgl 4918 UNC 0 : recoveryTargetTLIRequested = (TimeLineID) strtoul(newval, NULL, 0);
4919 : else
208 tgl 4920 GNC 1858 : recoveryTargetTLIRequested = 0;
4921 1858 : }
4922 :
4923 : /*
4924 : * GUC check_hook for recovery_target_xid
4925 : */
4926 : bool
4927 1858 : check_recovery_target_xid(char **newval, void **extra, GucSource source)
4928 : {
4929 1858 : if (strcmp(*newval, "") != 0)
4930 : {
4931 : TransactionId xid;
4932 : TransactionId *myextra;
4933 :
4934 1 : errno = 0;
4935 1 : xid = (TransactionId) strtou64(*newval, NULL, 0);
4936 1 : if (errno == EINVAL || errno == ERANGE)
208 tgl 4937 UNC 0 : return false;
4938 :
208 tgl 4939 GNC 1 : myextra = (TransactionId *) guc_malloc(ERROR, sizeof(TransactionId));
4940 1 : *myextra = xid;
4941 1 : *extra = (void *) myextra;
4942 : }
4943 1858 : return true;
4944 : }
4945 :
4946 : /*
4947 : * GUC assign_hook for recovery_target_xid
4948 : */
4949 : void
4950 1858 : assign_recovery_target_xid(const char *newval, void *extra)
4951 : {
4952 1858 : if (recoveryTarget != RECOVERY_TARGET_UNSET &&
208 tgl 4953 UNC 0 : recoveryTarget != RECOVERY_TARGET_XID)
4954 0 : error_multiple_recovery_targets();
4955 :
208 tgl 4956 GNC 1858 : if (newval && strcmp(newval, "") != 0)
4957 : {
4958 1 : recoveryTarget = RECOVERY_TARGET_XID;
4959 1 : recoveryTargetXid = *((TransactionId *) extra);
4960 : }
4961 : else
4962 1857 : recoveryTarget = RECOVERY_TARGET_UNSET;
4963 1858 : }
|