Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * walsummarizer.c
4 : : *
5 : : * Background process to perform WAL summarization, if it is enabled.
6 : : * It continuously scans the write-ahead log and periodically emits a
7 : : * summary file which indicates which blocks in which relation forks
8 : : * were modified by WAL records in the LSN range covered by the summary
9 : : * file. See walsummary.c and blkreftable.c for more details on the
10 : : * naming and contents of WAL summary files.
11 : : *
12 : : * If configured to do, this background process will also remove WAL
13 : : * summary files when the file timestamp is older than a configurable
14 : : * threshold (but only if the WAL has been removed first).
15 : : *
16 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
17 : : *
18 : : * IDENTIFICATION
19 : : * src/backend/postmaster/walsummarizer.c
20 : : *
21 : : *-------------------------------------------------------------------------
22 : : */
23 : : #include "postgres.h"
24 : :
25 : : #include "access/timeline.h"
26 : : #include "access/xlog.h"
27 : : #include "access/xlog_internal.h"
28 : : #include "access/xlogrecovery.h"
29 : : #include "access/xlogutils.h"
30 : : #include "backup/walsummary.h"
31 : : #include "catalog/storage_xlog.h"
32 : : #include "commands/dbcommands_xlog.h"
33 : : #include "common/blkreftable.h"
34 : : #include "libpq/pqsignal.h"
35 : : #include "miscadmin.h"
36 : : #include "postmaster/auxprocess.h"
37 : : #include "postmaster/interrupt.h"
38 : : #include "postmaster/walsummarizer.h"
39 : : #include "replication/walreceiver.h"
40 : : #include "storage/fd.h"
41 : : #include "storage/ipc.h"
42 : : #include "storage/latch.h"
43 : : #include "storage/lwlock.h"
44 : : #include "storage/proc.h"
45 : : #include "storage/procsignal.h"
46 : : #include "storage/shmem.h"
47 : : #include "utils/guc.h"
48 : : #include "utils/memutils.h"
49 : : #include "utils/wait_event.h"
50 : :
51 : : /*
52 : : * Data in shared memory related to WAL summarization.
53 : : */
54 : : typedef struct
55 : : {
56 : : /*
57 : : * These fields are protected by WALSummarizerLock.
58 : : *
59 : : * Until we've discovered what summary files already exist on disk and
60 : : * stored that information in shared memory, initialized is false and the
61 : : * other fields here contain no meaningful information. After that has
62 : : * been done, initialized is true.
63 : : *
64 : : * summarized_tli and summarized_lsn indicate the last LSN and TLI at
65 : : * which the next summary file will start. Normally, these are the LSN and
66 : : * TLI at which the last file ended; in such case, lsn_is_exact is true.
67 : : * If, however, the LSN is just an approximation, then lsn_is_exact is
68 : : * false. This can happen if, for example, there are no existing WAL
69 : : * summary files at startup. In that case, we have to derive the position
70 : : * at which to start summarizing from the WAL files that exist on disk,
71 : : * and so the LSN might point to the start of the next file even though
72 : : * that might happen to be in the middle of a WAL record.
73 : : *
74 : : * summarizer_pgprocno is the proc number of the summarizer process, if
75 : : * one is running, or else INVALID_PROC_NUMBER.
76 : : *
77 : : * pending_lsn is used by the summarizer to advertise the ending LSN of a
78 : : * record it has recently read. It shouldn't ever be less than
79 : : * summarized_lsn, but might be greater, because the summarizer buffers
80 : : * data for a range of LSNs in memory before writing out a new file.
81 : : */
82 : : bool initialized;
83 : : TimeLineID summarized_tli;
84 : : XLogRecPtr summarized_lsn;
85 : : bool lsn_is_exact;
86 : : ProcNumber summarizer_pgprocno;
87 : : XLogRecPtr pending_lsn;
88 : :
89 : : /*
90 : : * This field handles its own synchronization.
91 : : */
92 : : ConditionVariable summary_file_cv;
93 : : } WalSummarizerData;
94 : :
95 : : /*
96 : : * Private data for our xlogreader's page read callback.
97 : : */
98 : : typedef struct
99 : : {
100 : : TimeLineID tli;
101 : : bool historic;
102 : : XLogRecPtr read_upto;
103 : : bool end_of_wal;
104 : : } SummarizerReadLocalXLogPrivate;
105 : :
106 : : /* Pointer to shared memory state. */
107 : : static WalSummarizerData *WalSummarizerCtl;
108 : :
109 : : /*
110 : : * When we reach end of WAL and need to read more, we sleep for a number of
111 : : * milliseconds that is a integer multiple of MS_PER_SLEEP_QUANTUM. This is
112 : : * the multiplier. It should vary between 1 and MAX_SLEEP_QUANTA, depending
113 : : * on system activity. See summarizer_wait_for_wal() for how we adjust this.
114 : : */
115 : : static long sleep_quanta = 1;
116 : :
117 : : /*
118 : : * The sleep time will always be a multiple of 200ms and will not exceed
119 : : * thirty seconds (150 * 200 = 30 * 1000). Note that the timeout here needs
120 : : * to be substantially less than the maximum amount of time for which an
121 : : * incremental backup will wait for this process to catch up. Otherwise, an
122 : : * incremental backup might time out on an idle system just because we sleep
123 : : * for too long.
124 : : */
125 : : #define MAX_SLEEP_QUANTA 150
126 : : #define MS_PER_SLEEP_QUANTUM 200
127 : :
128 : : /*
129 : : * This is a count of the number of pages of WAL that we've read since the
130 : : * last time we waited for more WAL to appear.
131 : : */
132 : : static long pages_read_since_last_sleep = 0;
133 : :
134 : : /*
135 : : * Most recent RedoRecPtr value observed by MaybeRemoveOldWalSummaries.
136 : : */
137 : : static XLogRecPtr redo_pointer_at_last_summary_removal = InvalidXLogRecPtr;
138 : :
139 : : /*
140 : : * GUC parameters
141 : : */
142 : : bool summarize_wal = false;
143 : : int wal_summary_keep_time = 10 * HOURS_PER_DAY * MINS_PER_HOUR;
144 : :
145 : : static void WalSummarizerShutdown(int code, Datum arg);
146 : : static XLogRecPtr GetLatestLSN(TimeLineID *tli);
147 : : static void HandleWalSummarizerInterrupts(void);
148 : : static XLogRecPtr SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn,
149 : : bool exact, XLogRecPtr switch_lsn,
150 : : XLogRecPtr maximum_lsn);
151 : : static void SummarizeDbaseRecord(XLogReaderState *xlogreader,
152 : : BlockRefTable *brtab);
153 : : static void SummarizeSmgrRecord(XLogReaderState *xlogreader,
154 : : BlockRefTable *brtab);
155 : : static void SummarizeXactRecord(XLogReaderState *xlogreader,
156 : : BlockRefTable *brtab);
157 : : static bool SummarizeXlogRecord(XLogReaderState *xlogreader);
158 : : static int summarizer_read_local_xlog_page(XLogReaderState *state,
159 : : XLogRecPtr targetPagePtr,
160 : : int reqLen,
161 : : XLogRecPtr targetRecPtr,
162 : : char *cur_page);
163 : : static void summarizer_wait_for_wal(void);
164 : : static void MaybeRemoveOldWalSummaries(void);
165 : :
166 : : /*
167 : : * Amount of shared memory required for this module.
168 : : */
169 : : Size
116 rhaas@postgresql.org 170 :GNC 2577 : WalSummarizerShmemSize(void)
171 : : {
172 : 2577 : return sizeof(WalSummarizerData);
173 : : }
174 : :
175 : : /*
176 : : * Create or attach to shared memory segment for this module.
177 : : */
178 : : void
179 : 898 : WalSummarizerShmemInit(void)
180 : : {
181 : : bool found;
182 : :
183 : 898 : WalSummarizerCtl = (WalSummarizerData *)
184 : 898 : ShmemInitStruct("Wal Summarizer Ctl", WalSummarizerShmemSize(),
185 : : &found);
186 : :
187 [ + - ]: 898 : if (!found)
188 : : {
189 : : /*
190 : : * First time through, so initialize.
191 : : *
192 : : * We're just filling in dummy values here -- the real initialization
193 : : * will happen when GetOldestUnsummarizedLSN() is called for the first
194 : : * time.
195 : : */
196 : 898 : WalSummarizerCtl->initialized = false;
197 : 898 : WalSummarizerCtl->summarized_tli = 0;
198 : 898 : WalSummarizerCtl->summarized_lsn = InvalidXLogRecPtr;
199 : 898 : WalSummarizerCtl->lsn_is_exact = false;
42 heikki.linnakangas@i 200 : 898 : WalSummarizerCtl->summarizer_pgprocno = INVALID_PROC_NUMBER;
116 rhaas@postgresql.org 201 : 898 : WalSummarizerCtl->pending_lsn = InvalidXLogRecPtr;
202 : 898 : ConditionVariableInit(&WalSummarizerCtl->summary_file_cv);
203 : : }
204 : 898 : }
205 : :
206 : : /*
207 : : * Entry point for walsummarizer process.
208 : : */
209 : : void
27 heikki.linnakangas@i 210 : 12 : WalSummarizerMain(char *startup_data, size_t startup_data_len)
211 : : {
212 : : sigjmp_buf local_sigjmp_buf;
213 : : MemoryContext context;
214 : :
215 : : /*
216 : : * Within this function, 'current_lsn' and 'current_tli' refer to the
217 : : * point from which the next WAL summary file should start. 'exact' is
218 : : * true if 'current_lsn' is known to be the start of a WAL record or WAL
219 : : * segment, and false if it might be in the middle of a record someplace.
220 : : *
221 : : * 'switch_lsn' and 'switch_tli', if set, are the LSN at which we need to
222 : : * switch to a new timeline and the timeline to which we need to switch.
223 : : * If not set, we either haven't figured out the answers yet or we're
224 : : * already on the latest timeline.
225 : : */
226 : : XLogRecPtr current_lsn;
227 : : TimeLineID current_tli;
228 : : bool exact;
116 rhaas@postgresql.org 229 : 12 : XLogRecPtr switch_lsn = InvalidXLogRecPtr;
230 : 12 : TimeLineID switch_tli = 0;
231 : :
27 heikki.linnakangas@i 232 [ - + ]: 12 : Assert(startup_data_len == 0);
233 : :
234 : 12 : MyBackendType = B_WAL_SUMMARIZER;
235 : 12 : AuxiliaryProcessMainCommon();
236 : :
116 rhaas@postgresql.org 237 [ - + ]: 12 : ereport(DEBUG1,
238 : : (errmsg_internal("WAL summarizer started")));
239 : :
240 : : /*
241 : : * Properly accept or ignore signals the postmaster might send us
242 : : *
243 : : * We have no particular use for SIGINT at the moment, but seems
244 : : * reasonable to treat like SIGTERM.
245 : : */
246 : 12 : pqsignal(SIGHUP, SignalHandlerForConfigReload);
247 : 12 : pqsignal(SIGINT, SignalHandlerForShutdownRequest);
248 : 12 : pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
249 : : /* SIGQUIT handler was already set up by InitPostmasterChild */
250 : 12 : pqsignal(SIGALRM, SIG_IGN);
251 : 12 : pqsignal(SIGPIPE, SIG_IGN);
252 : 12 : pqsignal(SIGUSR1, procsignal_sigusr1_handler);
253 : 12 : pqsignal(SIGUSR2, SIG_IGN); /* not used */
254 : :
255 : : /* Advertise ourselves. */
94 256 : 12 : on_shmem_exit(WalSummarizerShutdown, (Datum) 0);
116 257 : 12 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
52 heikki.linnakangas@i 258 : 12 : WalSummarizerCtl->summarizer_pgprocno = MyProcNumber;
116 rhaas@postgresql.org 259 : 12 : LWLockRelease(WALSummarizerLock);
260 : :
261 : : /* Create and switch to a memory context that we can reset on error. */
262 : 12 : context = AllocSetContextCreate(TopMemoryContext,
263 : : "Wal Summarizer",
264 : : ALLOCSET_DEFAULT_SIZES);
265 : 12 : MemoryContextSwitchTo(context);
266 : :
267 : : /*
268 : : * Reset some signals that are accepted by postmaster but not here
269 : : */
270 : 12 : pqsignal(SIGCHLD, SIG_DFL);
271 : :
272 : : /*
273 : : * If an exception is encountered, processing resumes here.
274 : : */
275 [ - + ]: 12 : if (sigsetjmp(local_sigjmp_buf, 1) != 0)
276 : : {
277 : : /* Since not using PG_TRY, must reset error stack by hand */
116 rhaas@postgresql.org 278 :UNC 0 : error_context_stack = NULL;
279 : :
280 : : /* Prevent interrupts while cleaning up */
281 : 0 : HOLD_INTERRUPTS();
282 : :
283 : : /* Report the error to the server log */
284 : 0 : EmitErrorReport();
285 : :
286 : : /* Release resources we might have acquired. */
287 : 0 : LWLockReleaseAll();
288 : 0 : ConditionVariableCancelSleep();
289 : 0 : pgstat_report_wait_end();
290 : 0 : ReleaseAuxProcessResources(false);
291 : 0 : AtEOXact_Files(false);
292 : 0 : AtEOXact_HashTables(false);
293 : :
294 : : /*
295 : : * Now return to normal top-level context and clear ErrorContext for
296 : : * next time.
297 : : */
298 : 0 : MemoryContextSwitchTo(context);
299 : 0 : FlushErrorState();
300 : :
301 : : /* Flush any leaked data in the top-level context */
302 : 0 : MemoryContextReset(context);
303 : :
304 : : /* Now we can allow interrupts again */
305 [ # # ]: 0 : RESUME_INTERRUPTS();
306 : :
307 : : /*
308 : : * Sleep for 10 seconds before attempting to resume operations in
309 : : * order to avoid excessive logging.
310 : : *
311 : : * Many of the likely error conditions are things that will repeat
312 : : * every time. For example, if the WAL can't be read or the summary
313 : : * can't be written, only administrator action will cure the problem.
314 : : * So a really fast retry time doesn't seem to be especially
315 : : * beneficial, and it will clutter the logs.
316 : : */
317 : 0 : (void) WaitLatch(MyLatch,
318 : : WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
319 : : 10000,
320 : : WAIT_EVENT_WAL_SUMMARIZER_ERROR);
321 : : }
322 : :
323 : : /* We can now handle ereport(ERROR) */
116 rhaas@postgresql.org 324 :GNC 12 : PG_exception_stack = &local_sigjmp_buf;
325 : :
326 : : /*
327 : : * Unblock signals (they were blocked when the postmaster forked us)
328 : : */
329 : 12 : sigprocmask(SIG_SETMASK, &UnBlockSig, NULL);
330 : :
331 : : /*
332 : : * Fetch information about previous progress from shared memory, and ask
333 : : * GetOldestUnsummarizedLSN to reset pending_lsn to summarized_lsn. We
334 : : * might be recovering from an error, and if so, pending_lsn might have
335 : : * advanced past summarized_lsn, but any WAL we read previously has been
336 : : * lost and will need to be reread.
337 : : *
338 : : * If we discover that WAL summarization is not enabled, just exit.
339 : : */
340 : 12 : current_lsn = GetOldestUnsummarizedLSN(¤t_tli, &exact, true);
341 [ + - ]: 12 : if (XLogRecPtrIsInvalid(current_lsn))
116 rhaas@postgresql.org 342 :UNC 0 : proc_exit(0);
343 : :
344 : : /*
345 : : * Loop forever
346 : : */
347 : : for (;;)
116 rhaas@postgresql.org 348 :GNC 71 : {
349 : : XLogRecPtr latest_lsn;
350 : : TimeLineID latest_tli;
351 : : XLogRecPtr end_of_summary_lsn;
352 : :
353 : : /* Flush any leaked data in the top-level context */
354 : 83 : MemoryContextReset(context);
355 : :
356 : : /* Process any signals received recently. */
357 : 83 : HandleWalSummarizerInterrupts();
358 : :
359 : : /* If it's time to remove any old WAL summaries, do that now. */
360 : 83 : MaybeRemoveOldWalSummaries();
361 : :
362 : : /* Find the LSN and TLI up to which we can safely summarize. */
363 : 83 : latest_lsn = GetLatestLSN(&latest_tli);
364 : :
365 : : /*
366 : : * If we're summarizing a historic timeline and we haven't yet
367 : : * computed the point at which to switch to the next timeline, do that
368 : : * now.
369 : : *
370 : : * Note that if this is a standby, what was previously the current
371 : : * timeline could become historic at any time.
372 : : *
373 : : * We could try to make this more efficient by caching the results of
374 : : * readTimeLineHistory when latest_tli has not changed, but since we
375 : : * only have to do this once per timeline switch, we probably wouldn't
376 : : * save any significant amount of work in practice.
377 : : */
378 [ + + + + ]: 83 : if (current_tli != latest_tli && XLogRecPtrIsInvalid(switch_lsn))
379 : : {
380 : 2 : List *tles = readTimeLineHistory(latest_tli);
381 : :
382 : 2 : switch_lsn = tliSwitchPoint(current_tli, tles, &switch_tli);
383 [ - + ]: 2 : ereport(DEBUG1,
384 : : errmsg("switch point from TLI %u to TLI %u is at %X/%X",
385 : : current_tli, switch_tli, LSN_FORMAT_ARGS(switch_lsn)));
386 : : }
387 : :
388 : : /*
389 : : * If we've reached the switch LSN, we can't summarize anything else
390 : : * on this timeline. Switch to the next timeline and go around again.
391 : : */
392 [ + + + + ]: 83 : if (!XLogRecPtrIsInvalid(switch_lsn) && current_lsn >= switch_lsn)
393 : : {
394 : 2 : current_tli = switch_tli;
395 : 2 : switch_lsn = InvalidXLogRecPtr;
396 : 2 : switch_tli = 0;
397 : 2 : continue;
398 : : }
399 : :
400 : : /* Summarize WAL. */
401 : 81 : end_of_summary_lsn = SummarizeWAL(current_tli,
402 : : current_lsn, exact,
403 : : switch_lsn, latest_lsn);
404 [ - + ]: 69 : Assert(!XLogRecPtrIsInvalid(end_of_summary_lsn));
405 [ - + ]: 69 : Assert(end_of_summary_lsn >= current_lsn);
406 : :
407 : : /*
408 : : * Update state for next loop iteration.
409 : : *
410 : : * Next summary file should start from exactly where this one ended.
411 : : */
412 : 69 : current_lsn = end_of_summary_lsn;
413 : 69 : exact = true;
414 : :
415 : : /* Update state in shared memory. */
416 : 69 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
417 [ - + ]: 69 : Assert(WalSummarizerCtl->pending_lsn <= end_of_summary_lsn);
418 : 69 : WalSummarizerCtl->summarized_lsn = end_of_summary_lsn;
419 : 69 : WalSummarizerCtl->summarized_tli = current_tli;
420 : 69 : WalSummarizerCtl->lsn_is_exact = true;
421 : 69 : WalSummarizerCtl->pending_lsn = end_of_summary_lsn;
422 : 69 : LWLockRelease(WALSummarizerLock);
423 : :
424 : : /* Wake up anyone waiting for more summary files to be written. */
425 : 69 : ConditionVariableBroadcast(&WalSummarizerCtl->summary_file_cv);
426 : : }
427 : : }
428 : :
429 : : /*
430 : : * Get information about the state of the WAL summarizer.
431 : : */
432 : : void
94 rhaas@postgresql.org 433 :UNC 0 : GetWalSummarizerState(TimeLineID *summarized_tli, XLogRecPtr *summarized_lsn,
434 : : XLogRecPtr *pending_lsn, int *summarizer_pid)
435 : : {
436 : 0 : LWLockAcquire(WALSummarizerLock, LW_SHARED);
437 [ # # ]: 0 : if (!WalSummarizerCtl->initialized)
438 : : {
439 : : /*
440 : : * If initialized is false, the rest of the structure contents are
441 : : * undefined.
442 : : */
443 : 0 : *summarized_tli = 0;
444 : 0 : *summarized_lsn = InvalidXLogRecPtr;
445 : 0 : *pending_lsn = InvalidXLogRecPtr;
446 : 0 : *summarizer_pid = -1;
447 : : }
448 : : else
449 : : {
450 : 0 : int summarizer_pgprocno = WalSummarizerCtl->summarizer_pgprocno;
451 : :
452 : 0 : *summarized_tli = WalSummarizerCtl->summarized_tli;
453 : 0 : *summarized_lsn = WalSummarizerCtl->summarized_lsn;
42 heikki.linnakangas@i 454 [ # # ]: 0 : if (summarizer_pgprocno == INVALID_PROC_NUMBER)
455 : : {
456 : : /*
457 : : * If the summarizer has exited, the fact that it had processed
458 : : * beyond summarized_lsn is irrelevant now.
459 : : */
94 rhaas@postgresql.org 460 : 0 : *pending_lsn = WalSummarizerCtl->summarized_lsn;
461 : 0 : *summarizer_pid = -1;
462 : : }
463 : : else
464 : : {
465 : 0 : *pending_lsn = WalSummarizerCtl->pending_lsn;
466 : :
467 : : /*
468 : : * We're not fussed about inexact answers here, since they could
469 : : * become stale instantly, so we don't bother taking the lock, but
470 : : * make sure that invalid PID values are normalized to -1.
471 : : */
472 : 0 : *summarizer_pid = GetPGProcByNumber(summarizer_pgprocno)->pid;
473 [ # # ]: 0 : if (*summarizer_pid <= 0)
474 : 0 : *summarizer_pid = -1;
475 : : }
476 : : }
477 : 0 : LWLockRelease(WALSummarizerLock);
478 : 0 : }
479 : :
480 : : /*
481 : : * Get the oldest LSN in this server's timeline history that has not yet been
482 : : * summarized.
483 : : *
484 : : * If *tli != NULL, it will be set to the TLI for the LSN that is returned.
485 : : *
486 : : * If *lsn_is_exact != NULL, it will be set to true if the returned LSN is
487 : : * necessarily the start of a WAL record and false if it's just the beginning
488 : : * of a WAL segment.
489 : : *
490 : : * If reset_pending_lsn is true, resets the pending_lsn in shared memory to
491 : : * be equal to the summarized_lsn.
492 : : */
493 : : XLogRecPtr
116 rhaas@postgresql.org 494 :GNC 1578 : GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact,
495 : : bool reset_pending_lsn)
496 : : {
497 : : TimeLineID latest_tli;
498 : 1578 : LWLockMode mode = reset_pending_lsn ? LW_EXCLUSIVE : LW_SHARED;
499 : : int n;
500 : : List *tles;
109 501 : 1578 : XLogRecPtr unsummarized_lsn = InvalidXLogRecPtr;
116 502 : 1578 : TimeLineID unsummarized_tli = 0;
503 : 1578 : bool should_make_exact = false;
504 : : List *existing_summaries;
505 : : ListCell *lc;
506 : :
507 : : /* If not summarizing WAL, do nothing. */
508 [ + + ]: 1578 : if (!summarize_wal)
509 : 1540 : return InvalidXLogRecPtr;
510 : :
511 : : /*
512 : : * Unless we need to reset the pending_lsn, we initially acquire the lock
513 : : * in shared mode and try to fetch the required information. If we acquire
514 : : * in shared mode and find that the data structure hasn't been
515 : : * initialized, we reacquire the lock in exclusive mode so that we can
516 : : * initialize it. However, if someone else does that first before we get
517 : : * the lock, then we can just return the requested information after all.
518 : : */
519 : : while (1)
520 : : {
521 : 41 : LWLockAcquire(WALSummarizerLock, mode);
522 : :
523 [ + + ]: 41 : if (WalSummarizerCtl->initialized)
524 : : {
525 : 26 : unsummarized_lsn = WalSummarizerCtl->summarized_lsn;
526 [ + + ]: 26 : if (tli != NULL)
527 : 3 : *tli = WalSummarizerCtl->summarized_tli;
528 [ + + ]: 26 : if (lsn_is_exact != NULL)
529 : 3 : *lsn_is_exact = WalSummarizerCtl->lsn_is_exact;
530 [ + + ]: 26 : if (reset_pending_lsn)
531 : 3 : WalSummarizerCtl->pending_lsn =
532 : 3 : WalSummarizerCtl->summarized_lsn;
533 : 26 : LWLockRelease(WALSummarizerLock);
534 : 26 : return unsummarized_lsn;
535 : : }
536 : :
537 [ + + ]: 15 : if (mode == LW_EXCLUSIVE)
538 : 12 : break;
539 : :
540 : 3 : LWLockRelease(WALSummarizerLock);
541 : 3 : mode = LW_EXCLUSIVE;
542 : : }
543 : :
544 : : /*
545 : : * The data structure needs to be initialized, and we are the first to
546 : : * obtain the lock in exclusive mode, so it's our job to do that
547 : : * initialization.
548 : : *
549 : : * So, find the oldest timeline on which WAL still exists, and the
550 : : * earliest segment for which it exists.
551 : : */
552 : 12 : (void) GetLatestLSN(&latest_tli);
553 : 12 : tles = readTimeLineHistory(latest_tli);
554 [ + - ]: 12 : for (n = list_length(tles) - 1; n >= 0; --n)
555 : : {
556 : 12 : TimeLineHistoryEntry *tle = list_nth(tles, n);
557 : : XLogSegNo oldest_segno;
558 : :
559 : 12 : oldest_segno = XLogGetOldestSegno(tle->tli);
560 [ + - ]: 12 : if (oldest_segno != 0)
561 : : {
562 : : /* Compute oldest LSN that still exists on disk. */
563 : 12 : XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size,
564 : : unsummarized_lsn);
565 : :
566 : 12 : unsummarized_tli = tle->tli;
567 : 12 : break;
568 : : }
569 : : }
570 : :
571 : : /* It really should not be possible for us to find no WAL. */
572 [ - + ]: 12 : if (unsummarized_tli == 0)
116 rhaas@postgresql.org 573 [ # # ]:UNC 0 : ereport(ERROR,
574 : : errcode(ERRCODE_INTERNAL_ERROR),
575 : : errmsg_internal("no WAL found on timeline %u", latest_tli));
576 : :
577 : : /*
578 : : * Don't try to summarize anything older than the end LSN of the newest
579 : : * summary file that exists for this timeline.
580 : : */
581 : : existing_summaries =
116 rhaas@postgresql.org 582 :GNC 12 : GetWalSummaries(unsummarized_tli,
583 : : InvalidXLogRecPtr, InvalidXLogRecPtr);
584 [ - + - - : 12 : foreach(lc, existing_summaries)
- + ]
585 : : {
116 rhaas@postgresql.org 586 :UNC 0 : WalSummaryFile *ws = lfirst(lc);
587 : :
588 [ # # ]: 0 : if (ws->end_lsn > unsummarized_lsn)
589 : : {
590 : 0 : unsummarized_lsn = ws->end_lsn;
591 : 0 : should_make_exact = true;
592 : : }
593 : : }
594 : :
595 : : /* Update shared memory with the discovered values. */
116 rhaas@postgresql.org 596 :GNC 12 : WalSummarizerCtl->initialized = true;
597 : 12 : WalSummarizerCtl->summarized_lsn = unsummarized_lsn;
598 : 12 : WalSummarizerCtl->summarized_tli = unsummarized_tli;
599 : 12 : WalSummarizerCtl->lsn_is_exact = should_make_exact;
600 : 12 : WalSummarizerCtl->pending_lsn = unsummarized_lsn;
601 : :
602 : : /* Also return the to the caller as required. */
603 [ + + ]: 12 : if (tli != NULL)
604 : 9 : *tli = WalSummarizerCtl->summarized_tli;
605 [ + + ]: 12 : if (lsn_is_exact != NULL)
606 : 9 : *lsn_is_exact = WalSummarizerCtl->lsn_is_exact;
607 : 12 : LWLockRelease(WALSummarizerLock);
608 : :
609 : 12 : return unsummarized_lsn;
610 : : }
611 : :
612 : : /*
613 : : * Attempt to set the WAL summarizer's latch.
614 : : *
615 : : * This might not work, because there's no guarantee that the WAL summarizer
616 : : * process was successfully started, and it also might have started but
617 : : * subsequently terminated. So, under normal circumstances, this will get the
618 : : * latch set, but there's no guarantee.
619 : : */
620 : : void
621 : 1100 : SetWalSummarizerLatch(void)
622 : : {
623 : : ProcNumber pgprocno;
624 : :
625 [ - + ]: 1100 : if (WalSummarizerCtl == NULL)
116 rhaas@postgresql.org 626 :UNC 0 : return;
627 : :
116 rhaas@postgresql.org 628 :GNC 1100 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
629 : 1100 : pgprocno = WalSummarizerCtl->summarizer_pgprocno;
630 : 1100 : LWLockRelease(WALSummarizerLock);
631 : :
42 heikki.linnakangas@i 632 [ + + ]: 1100 : if (pgprocno != INVALID_PROC_NUMBER)
116 rhaas@postgresql.org 633 : 22 : SetLatch(&ProcGlobal->allProcs[pgprocno].procLatch);
634 : : }
635 : :
636 : : /*
637 : : * Wait until WAL summarization reaches the given LSN, but not longer than
638 : : * the given timeout.
639 : : *
640 : : * The return value is the first still-unsummarized LSN. If it's greater than
641 : : * or equal to the passed LSN, then that LSN was reached. If not, we timed out.
642 : : *
643 : : * Either way, *pending_lsn is set to the value taken from WalSummarizerCtl.
644 : : */
645 : : XLogRecPtr
646 : 7 : WaitForWalSummarization(XLogRecPtr lsn, long timeout, XLogRecPtr *pending_lsn)
647 : : {
648 : 7 : TimestampTz start_time = GetCurrentTimestamp();
649 : 7 : TimestampTz deadline = TimestampTzPlusMilliseconds(start_time, timeout);
650 : : XLogRecPtr summarized_lsn;
651 : :
652 [ - + ]: 7 : Assert(!XLogRecPtrIsInvalid(lsn));
653 [ + - ]: 7 : Assert(timeout > 0);
654 : :
655 : : while (1)
656 : 12 : {
657 : : TimestampTz now;
658 : : long remaining_timeout;
659 : :
660 : : /*
661 : : * If the LSN summarized on disk has reached the target value, stop.
662 : : */
663 : 19 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
664 : 19 : summarized_lsn = WalSummarizerCtl->summarized_lsn;
665 : 19 : *pending_lsn = WalSummarizerCtl->pending_lsn;
666 : 19 : LWLockRelease(WALSummarizerLock);
667 [ + + ]: 19 : if (summarized_lsn >= lsn)
668 : 7 : break;
669 : :
670 : : /* Timeout reached? If yes, stop. */
671 : 12 : now = GetCurrentTimestamp();
672 : 12 : remaining_timeout = TimestampDifferenceMilliseconds(now, deadline);
673 [ - + ]: 12 : if (remaining_timeout <= 0)
116 rhaas@postgresql.org 674 :UNC 0 : break;
675 : :
676 : : /* Wait and see. */
116 rhaas@postgresql.org 677 :GNC 12 : ConditionVariableTimedSleep(&WalSummarizerCtl->summary_file_cv,
678 : : remaining_timeout,
679 : : WAIT_EVENT_WAL_SUMMARY_READY);
680 : : }
681 : :
682 : 7 : return summarized_lsn;
683 : : }
684 : :
685 : : /*
686 : : * On exit, update shared memory to make it clear that we're no longer
687 : : * running.
688 : : */
689 : : static void
94 rhaas@postgresql.org 690 :UNC 0 : WalSummarizerShutdown(int code, Datum arg)
691 : : {
692 : 0 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
42 heikki.linnakangas@i 693 : 0 : WalSummarizerCtl->summarizer_pgprocno = INVALID_PROC_NUMBER;
94 rhaas@postgresql.org 694 : 0 : LWLockRelease(WALSummarizerLock);
695 : 0 : }
696 : :
697 : : /*
698 : : * Get the latest LSN that is eligible to be summarized, and set *tli to the
699 : : * corresponding timeline.
700 : : */
701 : : static XLogRecPtr
116 rhaas@postgresql.org 702 :GNC 210 : GetLatestLSN(TimeLineID *tli)
703 : : {
704 [ + + ]: 210 : if (!RecoveryInProgress())
705 : : {
706 : : /* Don't summarize WAL before it's flushed. */
707 : 196 : return GetFlushRecPtr(tli);
708 : : }
709 : : else
710 : : {
711 : : XLogRecPtr flush_lsn;
712 : : TimeLineID flush_tli;
713 : : XLogRecPtr replay_lsn;
714 : : TimeLineID replay_tli;
715 : :
716 : : /*
717 : : * What we really want to know is how much WAL has been flushed to
718 : : * disk, but the only flush position available is the one provided by
719 : : * the walreceiver, which may not be running, because this could be
720 : : * crash recovery or recovery via restore_command. So use either the
721 : : * WAL receiver's flush position or the replay position, whichever is
722 : : * further ahead, on the theory that if the WAL has been replayed then
723 : : * it must also have been flushed to disk.
724 : : */
725 : 14 : flush_lsn = GetWalRcvFlushRecPtr(NULL, &flush_tli);
726 : 14 : replay_lsn = GetXLogReplayRecPtr(&replay_tli);
727 [ - + ]: 14 : if (flush_lsn > replay_lsn)
728 : : {
116 rhaas@postgresql.org 729 :UNC 0 : *tli = flush_tli;
730 : 0 : return flush_lsn;
731 : : }
732 : : else
733 : : {
116 rhaas@postgresql.org 734 :GNC 14 : *tli = replay_tli;
735 : 14 : return replay_lsn;
736 : : }
737 : : }
738 : : }
739 : :
740 : : /*
741 : : * Interrupt handler for main loop of WAL summarizer process.
742 : : */
743 : : static void
744 : 201219 : HandleWalSummarizerInterrupts(void)
745 : : {
746 [ + + ]: 201219 : if (ProcSignalBarrierPending)
747 : 3 : ProcessProcSignalBarrier();
748 : :
749 [ - + ]: 201219 : if (ConfigReloadPending)
750 : : {
116 rhaas@postgresql.org 751 :UNC 0 : ConfigReloadPending = false;
752 : 0 : ProcessConfigFile(PGC_SIGHUP);
753 : : }
754 : :
116 rhaas@postgresql.org 755 [ + - - + ]:GNC 201219 : if (ShutdownRequestPending || !summarize_wal)
756 : : {
116 rhaas@postgresql.org 757 [ # # ]:UNC 0 : ereport(DEBUG1,
758 : : errmsg_internal("WAL summarizer shutting down"));
759 : 0 : proc_exit(0);
760 : : }
761 : :
762 : : /* Perform logging of memory contexts of this process */
116 rhaas@postgresql.org 763 [ - + ]:GNC 201219 : if (LogMemoryContextPending)
116 rhaas@postgresql.org 764 :UNC 0 : ProcessLogMemoryContextInterrupt();
116 rhaas@postgresql.org 765 :GNC 201219 : }
766 : :
767 : : /*
768 : : * Summarize a range of WAL records on a single timeline.
769 : : *
770 : : * 'tli' is the timeline to be summarized.
771 : : *
772 : : * 'start_lsn' is the point at which we should start summarizing. If this
773 : : * value comes from the end LSN of the previous record as returned by the
774 : : * xlogreader machinery, 'exact' should be true; otherwise, 'exact' should
775 : : * be false, and this function will search forward for the start of a valid
776 : : * WAL record.
777 : : *
778 : : * 'switch_lsn' is the point at which we should switch to a later timeline,
779 : : * if we're summarizing a historic timeline.
780 : : *
781 : : * 'maximum_lsn' identifies the point beyond which we can't count on being
782 : : * able to read any more WAL. It should be the switch point when reading a
783 : : * historic timeline, or the most-recently-measured end of WAL when reading
784 : : * the current timeline.
785 : : *
786 : : * The return value is the LSN at which the WAL summary actually ends. Most
787 : : * often, a summary file ends because we notice that a checkpoint has
788 : : * occurred and reach the redo pointer of that checkpoint, but sometimes
789 : : * we stop for other reasons, such as a timeline switch.
790 : : */
791 : : static XLogRecPtr
792 : 81 : SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
793 : : XLogRecPtr switch_lsn, XLogRecPtr maximum_lsn)
794 : : {
795 : : SummarizerReadLocalXLogPrivate *private_data;
796 : : XLogReaderState *xlogreader;
797 : : XLogRecPtr summary_start_lsn;
798 : 81 : XLogRecPtr summary_end_lsn = switch_lsn;
799 : : char temp_path[MAXPGPATH];
800 : : char final_path[MAXPGPATH];
801 : : WalSummaryIO io;
802 : 81 : BlockRefTable *brtab = CreateEmptyBlockRefTable();
803 : :
804 : : /* Initialize private data for xlogreader. */
805 : : private_data = (SummarizerReadLocalXLogPrivate *)
806 : 81 : palloc0(sizeof(SummarizerReadLocalXLogPrivate));
807 : 81 : private_data->tli = tli;
808 : 81 : private_data->historic = !XLogRecPtrIsInvalid(switch_lsn);
809 : 81 : private_data->read_upto = maximum_lsn;
810 : :
811 : : /* Create xlogreader. */
812 : 81 : xlogreader = XLogReaderAllocate(wal_segment_size, NULL,
813 : 81 : XL_ROUTINE(.page_read = &summarizer_read_local_xlog_page,
814 : : .segment_open = &wal_segment_open,
815 : : .segment_close = &wal_segment_close),
816 : : private_data);
817 [ - + ]: 81 : if (xlogreader == NULL)
116 rhaas@postgresql.org 818 [ # # ]:UNC 0 : ereport(ERROR,
819 : : (errcode(ERRCODE_OUT_OF_MEMORY),
820 : : errmsg("out of memory"),
821 : : errdetail("Failed while allocating a WAL reading processor.")));
822 : :
823 : : /*
824 : : * When exact = false, we're starting from an arbitrary point in the WAL
825 : : * and must search forward for the start of the next record.
826 : : *
827 : : * When exact = true, start_lsn should be either the LSN where a record
828 : : * begins, or the LSN of a page where the page header is immediately
829 : : * followed by the start of a new record. XLogBeginRead should tolerate
830 : : * either case.
831 : : *
832 : : * We need to allow for both cases because the behavior of xlogreader
833 : : * varies. When a record spans two or more xlog pages, the ending LSN
834 : : * reported by xlogreader will be the starting LSN of the following
835 : : * record, but when an xlog page boundary falls between two records, the
836 : : * end LSN for the first will be reported as the first byte of the
837 : : * following page. We can't know until we read that page how large the
838 : : * header will be, but we'll have to skip over it to find the next record.
839 : : */
116 rhaas@postgresql.org 840 [ + + ]:GNC 81 : if (exact)
841 : : {
842 : : /*
843 : : * Even if start_lsn is the beginning of a page rather than the
844 : : * beginning of the first record on that page, we should still use it
845 : : * as the start LSN for the summary file. That's because we detect
846 : : * missing summary files by looking for cases where the end LSN of one
847 : : * file is less than the start LSN of the next file. When only a page
848 : : * header is skipped, nothing has been missed.
849 : : */
850 : 69 : XLogBeginRead(xlogreader, start_lsn);
851 : 69 : summary_start_lsn = start_lsn;
852 : : }
853 : : else
854 : : {
855 : 12 : summary_start_lsn = XLogFindNextRecord(xlogreader, start_lsn);
856 [ - + ]: 12 : if (XLogRecPtrIsInvalid(summary_start_lsn))
857 : : {
858 : : /*
859 : : * If we hit end-of-WAL while trying to find the next valid
860 : : * record, we must be on a historic timeline that has no valid
861 : : * records that begin after start_lsn and before end of WAL.
862 : : */
116 rhaas@postgresql.org 863 [ # # ]:UNC 0 : if (private_data->end_of_wal)
864 : : {
865 [ # # ]: 0 : ereport(DEBUG1,
866 : : errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
867 : : tli,
868 : : LSN_FORMAT_ARGS(start_lsn),
869 : : LSN_FORMAT_ARGS(private_data->read_upto)));
870 : :
871 : : /*
872 : : * The timeline ends at or after start_lsn, without containing
873 : : * any records. Thus, we must make sure the main loop does not
874 : : * iterate. If start_lsn is the end of the timeline, then we
875 : : * won't actually emit an empty summary file, but otherwise,
876 : : * we must, to capture the fact that the LSN range in question
877 : : * contains no interesting WAL records.
878 : : */
879 : 0 : summary_start_lsn = start_lsn;
880 : 0 : summary_end_lsn = private_data->read_upto;
881 : 0 : switch_lsn = xlogreader->EndRecPtr;
882 : : }
883 : : else
884 [ # # ]: 0 : ereport(ERROR,
885 : : (errmsg("could not find a valid record after %X/%X",
886 : : LSN_FORMAT_ARGS(start_lsn))));
887 : : }
888 : :
889 : : /* We shouldn't go backward. */
116 rhaas@postgresql.org 890 [ + - ]:GNC 12 : Assert(summary_start_lsn >= start_lsn);
891 : : }
892 : :
893 : : /*
894 : : * Main loop: read xlog records one by one.
895 : : */
896 : : while (1)
897 : 193971 : {
898 : : int block_id;
899 : : char *errormsg;
900 : : XLogRecord *record;
901 : 194052 : bool stop_requested = false;
902 : :
903 : 194052 : HandleWalSummarizerInterrupts();
904 : :
905 : : /* We shouldn't go backward. */
906 [ - + ]: 194052 : Assert(summary_start_lsn <= xlogreader->EndRecPtr);
907 : :
908 : : /* Now read the next record. */
909 : 194052 : record = XLogReadRecord(xlogreader, &errormsg);
910 [ + + ]: 194040 : if (record == NULL)
911 : : {
912 [ + - ]: 1 : if (private_data->end_of_wal)
913 : : {
914 : : /*
915 : : * This timeline must be historic and must end before we were
916 : : * able to read a complete record.
917 : : */
918 [ - + ]: 1 : ereport(DEBUG1,
919 : : errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
920 : : tli,
921 : : LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
922 : : LSN_FORMAT_ARGS(private_data->read_upto)));
923 : : /* Summary ends at end of WAL. */
924 : 1 : summary_end_lsn = private_data->read_upto;
925 : 1 : break;
926 : : }
116 rhaas@postgresql.org 927 [ # # ]:UNC 0 : if (errormsg)
928 [ # # ]: 0 : ereport(ERROR,
929 : : (errcode_for_file_access(),
930 : : errmsg("could not read WAL from timeline %u at %X/%X: %s",
931 : : tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
932 : : errormsg)));
933 : : else
934 [ # # ]: 0 : ereport(ERROR,
935 : : (errcode_for_file_access(),
936 : : errmsg("could not read WAL from timeline %u at %X/%X",
937 : : tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr))));
938 : : }
939 : :
940 : : /* We shouldn't go backward. */
116 rhaas@postgresql.org 941 [ - + ]:GNC 194039 : Assert(summary_start_lsn <= xlogreader->EndRecPtr);
942 : :
943 [ + + ]: 194039 : if (!XLogRecPtrIsInvalid(switch_lsn) &&
944 [ - + ]: 6 : xlogreader->ReadRecPtr >= switch_lsn)
945 : : {
946 : : /*
947 : : * Whoops! We've read a record that *starts* after the switch LSN,
948 : : * contrary to our goal of reading only until we hit the first
949 : : * record that ends at or after the switch LSN. Pretend we didn't
950 : : * read it after all by bailing out of this loop right here,
951 : : * before we do anything with this record.
952 : : *
953 : : * This can happen because the last record before the switch LSN
954 : : * might be continued across multiple pages, and then we might
955 : : * come to a page with XLP_FIRST_IS_OVERWRITE_CONTRECORD set. In
956 : : * that case, the record that was continued across multiple pages
957 : : * is incomplete and will be disregarded, and the read will
958 : : * restart from the beginning of the page that is flagged
959 : : * XLP_FIRST_IS_OVERWRITE_CONTRECORD.
960 : : *
961 : : * If this case occurs, we can fairly say that the current summary
962 : : * file ends at the switch LSN exactly. The first record on the
963 : : * page marked XLP_FIRST_IS_OVERWRITE_CONTRECORD will be
964 : : * discovered when generating the next summary file.
965 : : */
116 rhaas@postgresql.org 966 :UNC 0 : summary_end_lsn = switch_lsn;
967 : 0 : break;
968 : : }
969 : :
970 : : /* Special handling for particular types of WAL records. */
116 rhaas@postgresql.org 971 [ + + + + :GNC 194039 : switch (XLogRecGetRmid(xlogreader))
+ ]
972 : : {
41 973 : 22 : case RM_DBASE_ID:
974 : 22 : SummarizeDbaseRecord(xlogreader, brtab);
975 : 22 : break;
116 976 : 1015 : case RM_SMGR_ID:
977 : 1015 : SummarizeSmgrRecord(xlogreader, brtab);
978 : 1015 : break;
979 : 5192 : case RM_XACT_ID:
980 : 5192 : SummarizeXactRecord(xlogreader, brtab);
981 : 5192 : break;
982 : 4373 : case RM_XLOG_ID:
983 : 4373 : stop_requested = SummarizeXlogRecord(xlogreader);
984 : 4373 : break;
985 : 183437 : default:
986 : 183437 : break;
987 : : }
988 : :
989 : : /*
990 : : * If we've been told that it's time to end this WAL summary file, do
991 : : * so. As an exception, if there's nothing included in this WAL
992 : : * summary file yet, then stopping doesn't make any sense, and we
993 : : * should wait until the next stop point instead.
994 : : */
995 [ + + + + ]: 194039 : if (stop_requested && xlogreader->ReadRecPtr > summary_start_lsn)
996 : : {
997 : 67 : summary_end_lsn = xlogreader->ReadRecPtr;
998 : 67 : break;
999 : : }
1000 : :
1001 : : /* Feed block references from xlog record to block reference table. */
1002 [ + + ]: 385517 : for (block_id = 0; block_id <= XLogRecMaxBlockId(xlogreader);
1003 : 191545 : block_id++)
1004 : : {
1005 : : RelFileLocator rlocator;
1006 : : ForkNumber forknum;
1007 : : BlockNumber blocknum;
1008 : :
1009 [ + + ]: 191545 : if (!XLogRecGetBlockTagExtended(xlogreader, block_id, &rlocator,
1010 : : &forknum, &blocknum, NULL))
1011 : 140 : continue;
1012 : :
1013 : : /*
1014 : : * As we do elsewhere, ignore the FSM fork, because it's not fully
1015 : : * WAL-logged.
1016 : : */
1017 [ + + ]: 191405 : if (forknum != FSM_FORKNUM)
1018 : 191036 : BlockRefTableMarkBlockModified(brtab, &rlocator, forknum,
1019 : : blocknum);
1020 : : }
1021 : :
1022 : : /* Update our notion of where this summary file ends. */
1023 : 193972 : summary_end_lsn = xlogreader->EndRecPtr;
1024 : :
1025 : : /* Also update shared memory. */
1026 : 193972 : LWLockAcquire(WALSummarizerLock, LW_EXCLUSIVE);
1027 [ - + ]: 193972 : Assert(summary_end_lsn >= WalSummarizerCtl->pending_lsn);
1028 [ - + ]: 193972 : Assert(summary_end_lsn >= WalSummarizerCtl->summarized_lsn);
1029 : 193972 : WalSummarizerCtl->pending_lsn = summary_end_lsn;
1030 : 193972 : LWLockRelease(WALSummarizerLock);
1031 : :
1032 : : /*
1033 : : * If we have a switch LSN and have reached it, stop before reading
1034 : : * the next record.
1035 : : */
1036 [ + + ]: 193972 : if (!XLogRecPtrIsInvalid(switch_lsn) &&
1037 [ + + ]: 6 : xlogreader->EndRecPtr >= switch_lsn)
1038 : 1 : break;
1039 : : }
1040 : :
1041 : : /* Destroy xlogreader. */
1042 : 69 : pfree(xlogreader->private_data);
1043 : 69 : XLogReaderFree(xlogreader);
1044 : :
1045 : : /*
1046 : : * If a timeline switch occurs, we may fail to make any progress at all
1047 : : * before exiting the loop above. If that happens, we don't write a WAL
1048 : : * summary file at all.
1049 : : */
1050 [ + - ]: 69 : if (summary_end_lsn > summary_start_lsn)
1051 : : {
1052 : : /* Generate temporary and final path name. */
1053 : 69 : snprintf(temp_path, MAXPGPATH,
1054 : : XLOGDIR "/summaries/temp.summary");
1055 : 69 : snprintf(final_path, MAXPGPATH,
1056 : : XLOGDIR "/summaries/%08X%08X%08X%08X%08X.summary",
1057 : : tli,
1058 : 69 : LSN_FORMAT_ARGS(summary_start_lsn),
1059 : 69 : LSN_FORMAT_ARGS(summary_end_lsn));
1060 : :
1061 : : /* Open the temporary file for writing. */
1062 : 69 : io.filepos = 0;
1063 : 69 : io.file = PathNameOpenFile(temp_path, O_WRONLY | O_CREAT | O_TRUNC);
1064 [ - + ]: 69 : if (io.file < 0)
116 rhaas@postgresql.org 1065 [ # # ]:UNC 0 : ereport(ERROR,
1066 : : (errcode_for_file_access(),
1067 : : errmsg("could not create file \"%s\": %m", temp_path)));
1068 : :
1069 : : /* Write the data. */
116 rhaas@postgresql.org 1070 :GNC 69 : WriteBlockRefTable(brtab, WriteWalSummary, &io);
1071 : :
1072 : : /* Close temporary file and shut down xlogreader. */
1073 : 69 : FileClose(io.file);
1074 : :
1075 : : /* Tell the user what we did. */
1076 [ - + ]: 69 : ereport(DEBUG1,
1077 : : errmsg("summarized WAL on TLI %u from %X/%X to %X/%X",
1078 : : tli,
1079 : : LSN_FORMAT_ARGS(summary_start_lsn),
1080 : : LSN_FORMAT_ARGS(summary_end_lsn)));
1081 : :
1082 : : /* Durably rename the new summary into place. */
1083 : 69 : durable_rename(temp_path, final_path, ERROR);
1084 : : }
1085 : :
1086 : 69 : return summary_end_lsn;
1087 : : }
1088 : :
1089 : : /*
1090 : : * Special handling for WAL records with RM_DBASE_ID.
1091 : : */
1092 : : static void
41 1093 : 22 : SummarizeDbaseRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1094 : : {
1095 : 22 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1096 : :
1097 : : /*
1098 : : * We use relfilenode zero for a given database OID and tablespace OID to
1099 : : * indicate that all relations with that pair of IDs have been recreated
1100 : : * if they exist at all. Effectively, we're setting a limit block of 0 for
1101 : : * all such relfilenodes.
1102 : : *
1103 : : * Technically, this special handling is only needed in the case of
1104 : : * XLOG_DBASE_CREATE_FILE_COPY, because that can create a whole bunch of
1105 : : * relation files in a directory without logging anything specific to each
1106 : : * one. If we didn't mark the whole DB OID/TS OID combination in some way,
1107 : : * then a tablespace that was dropped after the reference backup and
1108 : : * recreated using the FILE_COPY method prior to the incremental backup
1109 : : * would look just like one that was never touched at all, which would be
1110 : : * catastrophic.
1111 : : *
1112 : : * But it seems best to adopt this treatment for all records that drop or
1113 : : * create a DB OID/TS OID combination. That's similar to how we treat the
1114 : : * limit block for individual relations, and it's an extra layer of safety
1115 : : * here. We can never lose data by marking more stuff as needing to be
1116 : : * backed up in full.
1117 : : */
1118 [ + + ]: 22 : if (info == XLOG_DBASE_CREATE_FILE_COPY)
1119 : : {
1120 : : xl_dbase_create_file_copy_rec *xlrec;
1121 : : RelFileLocator rlocator;
1122 : :
1123 : 16 : xlrec =
1124 : 16 : (xl_dbase_create_file_copy_rec *) XLogRecGetData(xlogreader);
1125 : 16 : rlocator.spcOid = xlrec->tablespace_id;
1126 : 16 : rlocator.dbOid = xlrec->db_id;
1127 : 16 : rlocator.relNumber = 0;
1128 : 16 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1129 : : }
1130 [ + + ]: 6 : else if (info == XLOG_DBASE_CREATE_WAL_LOG)
1131 : : {
1132 : : xl_dbase_create_wal_log_rec *xlrec;
1133 : : RelFileLocator rlocator;
1134 : :
1135 : 3 : xlrec = (xl_dbase_create_wal_log_rec *) XLogRecGetData(xlogreader);
1136 : 3 : rlocator.spcOid = xlrec->tablespace_id;
1137 : 3 : rlocator.dbOid = xlrec->db_id;
1138 : 3 : rlocator.relNumber = 0;
1139 : 3 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1140 : : }
1141 [ + - ]: 3 : else if (info == XLOG_DBASE_DROP)
1142 : : {
1143 : : xl_dbase_drop_rec *xlrec;
1144 : : RelFileLocator rlocator;
1145 : : int i;
1146 : :
1147 : 3 : xlrec = (xl_dbase_drop_rec *) XLogRecGetData(xlogreader);
1148 : 3 : rlocator.dbOid = xlrec->db_id;
1149 : 3 : rlocator.relNumber = 0;
1150 [ + + ]: 6 : for (i = 0; i < xlrec->ntablespaces; ++i)
1151 : : {
1152 : 3 : rlocator.spcOid = xlrec->tablespace_ids[i];
1153 : 3 : BlockRefTableSetLimitBlock(brtab, &rlocator, MAIN_FORKNUM, 0);
1154 : : }
1155 : : }
1156 : 22 : }
1157 : :
1158 : : /*
1159 : : * Special handling for WAL records with RM_SMGR_ID.
1160 : : */
1161 : : static void
116 1162 : 1015 : SummarizeSmgrRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1163 : : {
1164 : 1015 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1165 : :
1166 [ + - ]: 1015 : if (info == XLOG_SMGR_CREATE)
1167 : : {
1168 : : xl_smgr_create *xlrec;
1169 : :
1170 : : /*
1171 : : * If a new relation fork is created on disk, there is no point
1172 : : * tracking anything about which blocks have been modified, because
1173 : : * the whole thing will be new. Hence, set the limit block for this
1174 : : * fork to 0.
1175 : : *
1176 : : * Ignore the FSM fork, which is not fully WAL-logged.
1177 : : */
1178 : 1015 : xlrec = (xl_smgr_create *) XLogRecGetData(xlogreader);
1179 : :
1180 [ + + ]: 1015 : if (xlrec->forkNum != FSM_FORKNUM)
1181 : 904 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1182 : : xlrec->forkNum, 0);
1183 : : }
116 rhaas@postgresql.org 1184 [ # # ]:UNC 0 : else if (info == XLOG_SMGR_TRUNCATE)
1185 : : {
1186 : : xl_smgr_truncate *xlrec;
1187 : :
1188 : 0 : xlrec = (xl_smgr_truncate *) XLogRecGetData(xlogreader);
1189 : :
1190 : : /*
1191 : : * If a relation fork is truncated on disk, there is no point in
1192 : : * tracking anything about block modifications beyond the truncation
1193 : : * point.
1194 : : *
1195 : : * We ignore SMGR_TRUNCATE_FSM here because the FSM isn't fully
1196 : : * WAL-logged and thus we can't track modified blocks for it anyway.
1197 : : */
1198 [ # # ]: 0 : if ((xlrec->flags & SMGR_TRUNCATE_HEAP) != 0)
1199 : 0 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1200 : : MAIN_FORKNUM, xlrec->blkno);
1201 [ # # ]: 0 : if ((xlrec->flags & SMGR_TRUNCATE_VM) != 0)
1202 : 0 : BlockRefTableSetLimitBlock(brtab, &xlrec->rlocator,
1203 : : VISIBILITYMAP_FORKNUM, xlrec->blkno);
1204 : : }
116 rhaas@postgresql.org 1205 :GNC 1015 : }
1206 : :
1207 : : /*
1208 : : * Special handling for WAL records with RM_XACT_ID.
1209 : : */
1210 : : static void
1211 : 5192 : SummarizeXactRecord(XLogReaderState *xlogreader, BlockRefTable *brtab)
1212 : : {
1213 : 5192 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1214 : 5192 : uint8 xact_info = info & XLOG_XACT_OPMASK;
1215 : :
1216 [ - + - - ]: 5192 : if (xact_info == XLOG_XACT_COMMIT ||
1217 : : xact_info == XLOG_XACT_COMMIT_PREPARED)
1218 : 5192 : {
1219 : 5192 : xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(xlogreader);
1220 : : xl_xact_parsed_commit parsed;
1221 : : int i;
1222 : :
1223 : : /*
1224 : : * Don't track modified blocks for any relations that were removed on
1225 : : * commit.
1226 : : */
1227 : 5192 : ParseCommitRecord(XLogRecGetInfo(xlogreader), xlrec, &parsed);
1228 [ + + ]: 5210 : for (i = 0; i < parsed.nrels; ++i)
1229 : : {
1230 : : ForkNumber forknum;
1231 : :
1232 [ + + ]: 90 : for (forknum = 0; forknum <= MAX_FORKNUM; ++forknum)
1233 [ + + ]: 72 : if (forknum != FSM_FORKNUM)
1234 : 54 : BlockRefTableSetLimitBlock(brtab, &parsed.xlocators[i],
1235 : : forknum, 0);
1236 : : }
1237 : : }
116 rhaas@postgresql.org 1238 [ # # # # ]:UNC 0 : else if (xact_info == XLOG_XACT_ABORT ||
1239 : : xact_info == XLOG_XACT_ABORT_PREPARED)
1240 : : {
1241 : 0 : xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(xlogreader);
1242 : : xl_xact_parsed_abort parsed;
1243 : : int i;
1244 : :
1245 : : /*
1246 : : * Don't track modified blocks for any relations that were removed on
1247 : : * abort.
1248 : : */
1249 : 0 : ParseAbortRecord(XLogRecGetInfo(xlogreader), xlrec, &parsed);
1250 [ # # ]: 0 : for (i = 0; i < parsed.nrels; ++i)
1251 : : {
1252 : : ForkNumber forknum;
1253 : :
1254 [ # # ]: 0 : for (forknum = 0; forknum <= MAX_FORKNUM; ++forknum)
1255 [ # # ]: 0 : if (forknum != FSM_FORKNUM)
1256 : 0 : BlockRefTableSetLimitBlock(brtab, &parsed.xlocators[i],
1257 : : forknum, 0);
1258 : : }
1259 : : }
116 rhaas@postgresql.org 1260 :GNC 5192 : }
1261 : :
1262 : : /*
1263 : : * Special handling for WAL records with RM_XLOG_ID.
1264 : : */
1265 : : static bool
1266 : 4373 : SummarizeXlogRecord(XLogReaderState *xlogreader)
1267 : : {
1268 : 4373 : uint8 info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
1269 : :
1270 [ + + + + ]: 4373 : if (info == XLOG_CHECKPOINT_REDO || info == XLOG_CHECKPOINT_SHUTDOWN)
1271 : : {
1272 : : /*
1273 : : * This is an LSN at which redo might begin, so we'd like
1274 : : * summarization to stop just before this WAL record.
1275 : : */
1276 : 148 : return true;
1277 : : }
1278 : :
1279 : 4225 : return false;
1280 : : }
1281 : :
1282 : : /*
1283 : : * Similar to read_local_xlog_page, but limited to read from one particular
1284 : : * timeline. If the end of WAL is reached, it will wait for more if reading
1285 : : * from the current timeline, or give up if reading from a historic timeline.
1286 : : * In the latter case, it will also set private_data->end_of_wal = true.
1287 : : *
1288 : : * Caller must set private_data->tli to the TLI of interest,
1289 : : * private_data->read_upto to the lowest LSN that is not known to be safe
1290 : : * to read on that timeline, and private_data->historic to true if and only
1291 : : * if the timeline is not the current timeline. This function will update
1292 : : * private_data->read_upto and private_data->historic if more WAL appears
1293 : : * on the current timeline or if the current timeline becomes historic.
1294 : : */
1295 : : static int
1296 : 6767 : summarizer_read_local_xlog_page(XLogReaderState *state,
1297 : : XLogRecPtr targetPagePtr, int reqLen,
1298 : : XLogRecPtr targetRecPtr, char *cur_page)
1299 : : {
1300 : : int count;
1301 : : WALReadError errinfo;
1302 : : SummarizerReadLocalXLogPrivate *private_data;
1303 : :
1304 : 6767 : HandleWalSummarizerInterrupts();
1305 : :
1306 : 6767 : private_data = (SummarizerReadLocalXLogPrivate *)
1307 : : state->private_data;
1308 : :
1309 : : while (1)
1310 : : {
1311 [ + + ]: 6882 : if (targetPagePtr + XLOG_BLCKSZ <= private_data->read_upto)
1312 : : {
1313 : : /*
1314 : : * more than one block available; read only that block, have
1315 : : * caller come back if they need more.
1316 : : */
1317 : 6665 : count = XLOG_BLCKSZ;
1318 : 6665 : break;
1319 : : }
1320 [ + + ]: 217 : else if (targetPagePtr + reqLen > private_data->read_upto)
1321 : : {
1322 : : /* We don't seem to have enough data. */
1323 [ + + ]: 128 : if (private_data->historic)
1324 : : {
1325 : : /*
1326 : : * This is a historic timeline, so there will never be any
1327 : : * more data than we have currently.
1328 : : */
1329 : 1 : private_data->end_of_wal = true;
1330 : 1 : return -1;
1331 : : }
1332 : : else
1333 : : {
1334 : : XLogRecPtr latest_lsn;
1335 : : TimeLineID latest_tli;
1336 : :
1337 : : /*
1338 : : * This is - or at least was up until very recently - the
1339 : : * current timeline, so more data might show up. Delay here
1340 : : * so we don't tight-loop.
1341 : : */
1342 : 127 : HandleWalSummarizerInterrupts();
1343 : 127 : summarizer_wait_for_wal();
1344 : :
1345 : : /* Recheck end-of-WAL. */
1346 : 115 : latest_lsn = GetLatestLSN(&latest_tli);
1347 [ + + ]: 115 : if (private_data->tli == latest_tli)
1348 : : {
1349 : : /* Still the current timeline, update max LSN. */
1350 [ - + ]: 113 : Assert(latest_lsn >= private_data->read_upto);
1351 : 113 : private_data->read_upto = latest_lsn;
1352 : : }
1353 : : else
1354 : : {
1355 : 2 : List *tles = readTimeLineHistory(latest_tli);
1356 : : XLogRecPtr switchpoint;
1357 : :
1358 : : /*
1359 : : * The timeline we're scanning is no longer the latest
1360 : : * one. Figure out when it ended.
1361 : : */
1362 : 2 : private_data->historic = true;
1363 : 2 : switchpoint = tliSwitchPoint(private_data->tli, tles,
1364 : : NULL);
1365 : :
1366 : : /*
1367 : : * Allow reads up to exactly the switch point.
1368 : : *
1369 : : * It's possible that this will cause read_upto to move
1370 : : * backwards, because walreceiver might have read a
1371 : : * partial record and flushed it to disk, and we'd view
1372 : : * that data as safe to read. However, the
1373 : : * XLOG_END_OF_RECOVERY record will be written at the end
1374 : : * of the last complete WAL record, not at the end of the
1375 : : * WAL that we've flushed to disk.
1376 : : *
1377 : : * So switchpoint < private->read_upto is possible here,
1378 : : * but switchpoint < state->EndRecPtr should not be.
1379 : : */
1380 [ - + ]: 2 : Assert(switchpoint >= state->EndRecPtr);
1381 : 2 : private_data->read_upto = switchpoint;
1382 : :
1383 : : /* Debugging output. */
1384 [ - + ]: 2 : ereport(DEBUG1,
1385 : : errmsg("timeline %u became historic, can read up to %X/%X",
1386 : : private_data->tli, LSN_FORMAT_ARGS(private_data->read_upto)));
1387 : : }
1388 : :
1389 : : /* Go around and try again. */
1390 : : }
1391 : : }
1392 : : else
1393 : : {
1394 : : /* enough bytes available to satisfy the request */
1395 : 89 : count = private_data->read_upto - targetPagePtr;
1396 : 89 : break;
1397 : : }
1398 : : }
1399 : :
58 jdavis@postgresql.or 1400 [ - + ]: 6754 : if (!WALRead(state, cur_page, targetPagePtr, count,
1401 : : private_data->tli, &errinfo))
116 rhaas@postgresql.org 1402 :UNC 0 : WALReadRaiseError(&errinfo);
1403 : :
1404 : : /* Track that we read a page, for sleep time calculation. */
116 rhaas@postgresql.org 1405 :GNC 6754 : ++pages_read_since_last_sleep;
1406 : :
1407 : : /* number of valid bytes in the buffer */
1408 : 6754 : return count;
1409 : : }
1410 : :
1411 : : /*
1412 : : * Sleep for long enough that we believe it's likely that more WAL will
1413 : : * be available afterwards.
1414 : : */
1415 : : static void
1416 : 127 : summarizer_wait_for_wal(void)
1417 : : {
1418 [ + + ]: 127 : if (pages_read_since_last_sleep == 0)
1419 : : {
1420 : : /*
1421 : : * No pages were read since the last sleep, so double the sleep time,
1422 : : * but not beyond the maximum allowable value.
1423 : : */
1424 : 65 : sleep_quanta = Min(sleep_quanta * 2, MAX_SLEEP_QUANTA);
1425 : : }
1426 [ + + ]: 62 : else if (pages_read_since_last_sleep > 1)
1427 : : {
1428 : : /*
1429 : : * Multiple pages were read since the last sleep, so reduce the sleep
1430 : : * time.
1431 : : *
1432 : : * A large burst of activity should be able to quickly reduce the
1433 : : * sleep time to the minimum, but we don't want a handful of extra WAL
1434 : : * records to provoke a strong reaction. We choose to reduce the sleep
1435 : : * time by 1 quantum for each page read beyond the first, which is a
1436 : : * fairly arbitrary way of trying to be reactive without overreacting.
1437 : : */
1438 [ + + ]: 49 : if (pages_read_since_last_sleep > sleep_quanta - 1)
1439 : 41 : sleep_quanta = 1;
1440 : : else
1441 : 8 : sleep_quanta -= pages_read_since_last_sleep;
1442 : : }
1443 : :
1444 : : /* OK, now sleep. */
1445 : 127 : (void) WaitLatch(MyLatch,
1446 : : WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1447 : : sleep_quanta * MS_PER_SLEEP_QUANTUM,
1448 : : WAIT_EVENT_WAL_SUMMARIZER_WAL);
1449 : 115 : ResetLatch(MyLatch);
1450 : :
1451 : : /* Reset count of pages read. */
1452 : 115 : pages_read_since_last_sleep = 0;
1453 : 115 : }
1454 : :
1455 : : /*
1456 : : * Remove WAL summaries whose mtimes are older than wal_summary_keep_time.
1457 : : */
1458 : : static void
1459 : 83 : MaybeRemoveOldWalSummaries(void)
1460 : : {
1461 : 83 : XLogRecPtr redo_pointer = GetRedoRecPtr();
1462 : : List *wslist;
1463 : : time_t cutoff_time;
1464 : :
1465 : : /* If WAL summary removal is disabled, don't do anything. */
1466 [ - + ]: 83 : if (wal_summary_keep_time == 0)
116 rhaas@postgresql.org 1467 :UNC 0 : return;
1468 : :
1469 : : /*
1470 : : * If the redo pointer has not advanced, don't do anything.
1471 : : *
1472 : : * This has the effect that we only try to remove old WAL summary files
1473 : : * once per checkpoint cycle.
1474 : : */
116 rhaas@postgresql.org 1475 [ + + ]:GNC 83 : if (redo_pointer == redo_pointer_at_last_summary_removal)
1476 : 49 : return;
1477 : 34 : redo_pointer_at_last_summary_removal = redo_pointer;
1478 : :
1479 : : /*
1480 : : * Files should only be removed if the last modification time precedes the
1481 : : * cutoff time we compute here.
1482 : : */
25 nathan@postgresql.or 1483 : 34 : cutoff_time = time(NULL) - wal_summary_keep_time * SECS_PER_MINUTE;
1484 : :
1485 : : /* Get all the summaries that currently exist. */
116 rhaas@postgresql.org 1486 : 34 : wslist = GetWalSummaries(0, InvalidXLogRecPtr, InvalidXLogRecPtr);
1487 : :
1488 : : /* Loop until all summaries have been considered for removal. */
1489 [ + + ]: 56 : while (wslist != NIL)
1490 : : {
1491 : : ListCell *lc;
1492 : : XLogSegNo oldest_segno;
1493 : 22 : XLogRecPtr oldest_lsn = InvalidXLogRecPtr;
1494 : : TimeLineID selected_tli;
1495 : :
1496 : 22 : HandleWalSummarizerInterrupts();
1497 : :
1498 : : /*
1499 : : * Pick a timeline for which some summary files still exist on disk,
1500 : : * and find the oldest LSN that still exists on disk for that
1501 : : * timeline.
1502 : : */
1503 : 22 : selected_tli = ((WalSummaryFile *) linitial(wslist))->tli;
1504 : 22 : oldest_segno = XLogGetOldestSegno(selected_tli);
1505 [ + - ]: 22 : if (oldest_segno != 0)
1506 : 22 : XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size,
1507 : : oldest_lsn);
1508 : :
1509 : :
1510 : : /* Consider each WAL file on the selected timeline in turn. */
1511 [ + + + - : 190 : foreach(lc, wslist)
+ + ]
1512 : : {
1513 : 168 : WalSummaryFile *ws = lfirst(lc);
1514 : :
1515 : 168 : HandleWalSummarizerInterrupts();
1516 : :
1517 : : /* If it's not on this timeline, it's not time to consider it. */
1518 [ - + ]: 168 : if (selected_tli != ws->tli)
116 rhaas@postgresql.org 1519 :UNC 0 : continue;
1520 : :
1521 : : /*
1522 : : * If the WAL doesn't exist any more, we can remove it if the file
1523 : : * modification time is old enough.
1524 : : */
116 rhaas@postgresql.org 1525 [ + - + + ]:GNC 168 : if (XLogRecPtrIsInvalid(oldest_lsn) || ws->end_lsn <= oldest_lsn)
1526 : 67 : RemoveWalSummaryIfOlderThan(ws, cutoff_time);
1527 : :
1528 : : /*
1529 : : * Whether we removed the file or not, we need not consider it
1530 : : * again.
1531 : : */
1532 : 168 : wslist = foreach_delete_current(wslist, lc);
1533 : 168 : pfree(ws);
1534 : : }
1535 : : }
1536 : : }
|