Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * parsexlog.c
4 : * Functions for reading Write-Ahead-Log
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *-------------------------------------------------------------------------
10 : */
11 :
12 : #include "postgres_fe.h"
13 :
14 : #include <unistd.h>
15 :
16 : #include "access/rmgr.h"
17 : #include "access/xact.h"
18 : #include "access/xlog_internal.h"
19 : #include "access/xlogreader.h"
20 : #include "catalog/pg_control.h"
21 : #include "catalog/storage_xlog.h"
22 : #include "commands/dbcommands_xlog.h"
23 : #include "fe_utils/archive.h"
24 : #include "filemap.h"
25 : #include "pg_rewind.h"
26 :
27 : /*
28 : * RmgrNames is an array of the built-in resource manager names, to make error
29 : * messages a bit nicer.
30 : */
31 : #define PG_RMGR(symname,name,redo,desc,identify,startup,cleanup,mask,decode) \
32 : name,
33 :
34 : static const char *RmgrNames[RM_MAX_ID + 1] = {
35 : #include "access/rmgrlist.h"
36 : };
37 :
38 : #define RmgrName(rmid) (((rmid) <= RM_MAX_BUILTIN_ID) ? \
39 : RmgrNames[rmid] : "custom")
40 :
41 : static void extractPageInfo(XLogReaderState *record);
42 :
43 : static int xlogreadfd = -1;
44 : static XLogSegNo xlogreadsegno = 0;
45 : static char xlogfpath[MAXPGPATH];
46 :
47 : typedef struct XLogPageReadPrivate
48 : {
49 : const char *restoreCommand;
50 : int tliIndex;
51 : } XLogPageReadPrivate;
52 :
53 : static int SimpleXLogPageRead(XLogReaderState *xlogreader,
54 : XLogRecPtr targetPagePtr,
55 : int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
56 :
57 : /*
58 : * Read WAL from the datadir/pg_wal, starting from 'startpoint' on timeline
59 : * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
60 : * the data blocks touched by the WAL records, and return them in a page map.
61 : *
62 : * 'endpoint' is the end of the last record to read. The record starting at
63 : * 'endpoint' is the first one that is not read.
64 : */
65 : void
2686 teodor 66 CBC 13 : extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
67 : XLogRecPtr endpoint, const char *restoreCommand)
68 : {
69 : XLogRecord *record;
70 : XLogReaderState *xlogreader;
71 : char *errormsg;
72 : XLogPageReadPrivate private;
73 :
699 tmunro 74 13 : private.tliIndex = tliIndex;
75 13 : private.restoreCommand = restoreCommand;
76 13 : xlogreader = XLogReaderAllocate(WalSegSz, datadir,
77 13 : XL_ROUTINE(.page_read = &SimpleXLogPageRead),
78 : &private);
2928 fujii 79 13 : if (xlogreader == NULL)
503 alvherre 80 UBC 0 : pg_fatal("out of memory while allocating a WAL reading processor");
81 :
1169 heikki.linnakangas 82 CBC 13 : XLogBeginRead(xlogreader, startpoint);
83 : do
84 : {
699 tmunro 85 86700 : record = XLogReadRecord(xlogreader, &errormsg);
86 :
2939 heikki.linnakangas 87 86700 : if (record == NULL)
88 : {
1169 heikki.linnakangas 89 UBC 0 : XLogRecPtr errptr = xlogreader->EndRecPtr;
90 :
2939 91 0 : if (errormsg)
1469 peter 92 0 : pg_fatal("could not read WAL record at %X/%X: %s",
93 : LSN_FORMAT_ARGS(errptr),
94 : errormsg);
95 : else
96 0 : pg_fatal("could not read WAL record at %X/%X",
97 : LSN_FORMAT_ARGS(errptr));
98 : }
99 :
2939 heikki.linnakangas 100 CBC 86700 : extractPageInfo(xlogreader);
857 101 86700 : } while (xlogreader->EndRecPtr < endpoint);
102 :
103 : /*
104 : * If 'endpoint' didn't point exactly at a record boundary, the caller
105 : * messed up.
106 : */
382 alvherre 107 13 : if (xlogreader->EndRecPtr != endpoint)
382 alvherre 108 UBC 0 : pg_fatal("end pointer %X/%X is not a valid end point; expected %X/%X",
109 : LSN_FORMAT_ARGS(endpoint), LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
110 :
2939 heikki.linnakangas 111 CBC 13 : XLogReaderFree(xlogreader);
112 13 : if (xlogreadfd != -1)
113 : {
114 13 : close(xlogreadfd);
115 13 : xlogreadfd = -1;
116 : }
117 13 : }
118 :
119 : /*
120 : * Reads one WAL record. Returns the end position of the record, without
121 : * doing anything with the record itself.
122 : */
123 : XLogRecPtr
1082 michael 124 13 : readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex,
125 : const char *restoreCommand)
126 : {
127 : XLogRecord *record;
128 : XLogReaderState *xlogreader;
129 : char *errormsg;
130 : XLogPageReadPrivate private;
131 : XLogRecPtr endptr;
132 :
699 tmunro 133 13 : private.tliIndex = tliIndex;
134 13 : private.restoreCommand = restoreCommand;
135 13 : xlogreader = XLogReaderAllocate(WalSegSz, datadir,
136 13 : XL_ROUTINE(.page_read = &SimpleXLogPageRead),
137 : &private);
2928 fujii 138 13 : if (xlogreader == NULL)
503 alvherre 139 UBC 0 : pg_fatal("out of memory while allocating a WAL reading processor");
140 :
1169 heikki.linnakangas 141 CBC 13 : XLogBeginRead(xlogreader, ptr);
699 tmunro 142 13 : record = XLogReadRecord(xlogreader, &errormsg);
2939 heikki.linnakangas 143 13 : if (record == NULL)
144 : {
2939 heikki.linnakangas 145 UBC 0 : if (errormsg)
1469 peter 146 0 : pg_fatal("could not read WAL record at %X/%X: %s",
147 : LSN_FORMAT_ARGS(ptr), errormsg);
148 : else
149 0 : pg_fatal("could not read WAL record at %X/%X",
150 : LSN_FORMAT_ARGS(ptr));
151 : }
2939 heikki.linnakangas 152 CBC 13 : endptr = xlogreader->EndRecPtr;
153 :
154 13 : XLogReaderFree(xlogreader);
155 13 : if (xlogreadfd != -1)
156 : {
157 13 : close(xlogreadfd);
158 13 : xlogreadfd = -1;
159 : }
160 :
161 13 : return endptr;
162 : }
163 :
164 : /*
165 : * Find the previous checkpoint preceding given WAL location.
166 : */
167 : void
2686 teodor 168 13 : findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
169 : XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
170 : XLogRecPtr *lastchkptredo, const char *restoreCommand)
171 : {
172 : /* Walk backwards, starting from the given record */
173 : XLogRecord *record;
174 : XLogRecPtr searchptr;
175 : XLogReaderState *xlogreader;
176 : char *errormsg;
177 : XLogPageReadPrivate private;
178 :
179 : /*
180 : * The given fork pointer points to the end of the last common record,
181 : * which is not necessarily the beginning of the next record, if the
182 : * previous record happens to end at a page boundary. Skip over the page
183 : * header in that case to find the next record.
184 : */
2939 heikki.linnakangas 185 13 : if (forkptr % XLOG_BLCKSZ == 0)
186 : {
2028 andres 187 2 : if (XLogSegmentOffset(forkptr, WalSegSz) == 0)
188 2 : forkptr += SizeOfXLogLongPHD;
189 : else
2028 andres 190 UBC 0 : forkptr += SizeOfXLogShortPHD;
191 : }
192 :
699 tmunro 193 CBC 13 : private.tliIndex = tliIndex;
194 13 : private.restoreCommand = restoreCommand;
195 13 : xlogreader = XLogReaderAllocate(WalSegSz, datadir,
196 13 : XL_ROUTINE(.page_read = &SimpleXLogPageRead),
197 : &private);
2928 fujii 198 13 : if (xlogreader == NULL)
503 alvherre 199 UBC 0 : pg_fatal("out of memory while allocating a WAL reading processor");
200 :
2939 heikki.linnakangas 201 CBC 13 : searchptr = forkptr;
202 : for (;;)
203 2577 : {
204 : uint8 info;
205 :
1169 206 2590 : XLogBeginRead(xlogreader, searchptr);
699 tmunro 207 2590 : record = XLogReadRecord(xlogreader, &errormsg);
208 :
2939 heikki.linnakangas 209 2590 : if (record == NULL)
210 : {
2939 heikki.linnakangas 211 UBC 0 : if (errormsg)
1469 peter 212 0 : pg_fatal("could not find previous WAL record at %X/%X: %s",
213 : LSN_FORMAT_ARGS(searchptr),
214 : errormsg);
215 : else
216 0 : pg_fatal("could not find previous WAL record at %X/%X",
217 : LSN_FORMAT_ARGS(searchptr));
218 : }
219 :
220 : /*
221 : * Check if it is a checkpoint record. This checkpoint record needs to
222 : * be the latest checkpoint before WAL forked and not the checkpoint
223 : * where the primary has been stopped to be rewound.
224 : */
2939 heikki.linnakangas 225 CBC 2590 : info = XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK;
226 2590 : if (searchptr < forkptr &&
227 2577 : XLogRecGetRmid(xlogreader) == RM_XLOG_ID &&
228 1897 : (info == XLOG_CHECKPOINT_SHUTDOWN ||
229 : info == XLOG_CHECKPOINT_ONLINE))
230 : {
231 : CheckPoint checkPoint;
232 :
233 13 : memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
234 13 : *lastchkptrec = searchptr;
235 13 : *lastchkpttli = checkPoint.ThisTimeLineID;
236 13 : *lastchkptredo = checkPoint.redo;
237 13 : break;
238 : }
239 :
240 : /* Walk backwards to previous record. */
241 2577 : searchptr = record->xl_prev;
242 : }
243 :
244 13 : XLogReaderFree(xlogreader);
245 13 : if (xlogreadfd != -1)
246 : {
247 13 : close(xlogreadfd);
248 13 : xlogreadfd = -1;
249 : }
250 13 : }
251 :
252 : /* XLogReader callback function, to read a WAL page */
253 : static int
699 tmunro 254 5819 : SimpleXLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
255 : int reqLen, XLogRecPtr targetRecPtr, char *readBuf)
256 : {
257 5819 : XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
258 : uint32 targetPageOff;
259 : XLogRecPtr targetSegEnd;
260 : XLogSegNo targetSegNo;
261 : int r;
262 :
2028 andres 263 5819 : XLByteToSeg(targetPagePtr, targetSegNo, WalSegSz);
1735 alvherre 264 5819 : XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, WalSegSz, targetSegEnd);
2028 andres 265 5819 : targetPageOff = XLogSegmentOffset(targetPagePtr, WalSegSz);
266 :
267 : /*
268 : * See if we need to switch to a new segment because the requested record
269 : * is not in the currently open one.
270 : */
271 5819 : if (xlogreadfd >= 0 &&
272 5780 : !XLByteInSeg(targetPagePtr, xlogreadsegno, WalSegSz))
273 : {
2939 heikki.linnakangas 274 8 : close(xlogreadfd);
275 8 : xlogreadfd = -1;
276 : }
277 :
2028 andres 278 5819 : XLByteToSeg(targetPagePtr, xlogreadsegno, WalSegSz);
279 :
2939 heikki.linnakangas 280 5819 : if (xlogreadfd < 0)
281 : {
282 : char xlogfname[MAXFNAMELEN];
283 :
284 : /*
285 : * Since incomplete segments are copied into next timelines, switch to
286 : * the timeline holding the required segment. Assuming this scan can
287 : * be done both forward and backward, consider also switching timeline
288 : * accordingly.
289 : */
699 tmunro 290 49 : while (private->tliIndex < targetNentries - 1 &&
291 2 : targetHistory[private->tliIndex].end < targetSegEnd)
292 2 : private->tliIndex++;
293 47 : while (private->tliIndex > 0 &&
294 6 : targetHistory[private->tliIndex].begin >= targetSegEnd)
699 tmunro 295 UBC 0 : private->tliIndex--;
296 :
699 tmunro 297 CBC 47 : XLogFileName(xlogfname, targetHistory[private->tliIndex].tli,
298 : xlogreadsegno, WalSegSz);
299 :
1293 alvherre 300 47 : snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s",
301 47 : xlogreader->segcxt.ws_dir, xlogfname);
302 :
2939 heikki.linnakangas 303 47 : xlogreadfd = open(xlogfpath, O_RDONLY | PG_BINARY, 0);
304 :
305 47 : if (xlogreadfd < 0)
306 : {
307 : /*
308 : * If we have no restore_command to execute, then exit.
309 : */
699 tmunro 310 1 : if (private->restoreCommand == NULL)
311 : {
1103 michael 312 UBC 0 : pg_log_error("could not open file \"%s\": %m", xlogfpath);
699 tmunro 313 0 : return -1;
314 : }
315 :
316 : /*
317 : * Since we have restore_command, then try to retrieve missing WAL
318 : * file from the archive.
319 : */
1103 michael 320 CBC 1 : xlogreadfd = RestoreArchivedFile(xlogreader->segcxt.ws_dir,
321 : xlogfname,
322 : WalSegSz,
323 : private->restoreCommand);
324 :
325 1 : if (xlogreadfd < 0)
699 tmunro 326 UBC 0 : return -1;
327 : else
1103 michael 328 CBC 1 : pg_log_debug("using file \"%s\" restored from archive",
329 : xlogfpath);
330 : }
331 : }
332 :
333 : /*
334 : * At this point, we have the right segment open.
335 : */
2939 heikki.linnakangas 336 5819 : Assert(xlogreadfd != -1);
337 :
338 : /* Read the requested page */
339 5819 : if (lseek(xlogreadfd, (off_t) targetPageOff, SEEK_SET) < 0)
340 : {
1469 peter 341 UBC 0 : pg_log_error("could not seek in file \"%s\": %m", xlogfpath);
699 tmunro 342 0 : return -1;
343 : }
344 :
345 :
1726 michael 346 CBC 5819 : r = read(xlogreadfd, readBuf, XLOG_BLCKSZ);
347 5819 : if (r != XLOG_BLCKSZ)
348 : {
1726 michael 349 UBC 0 : if (r < 0)
1469 peter 350 0 : pg_log_error("could not read file \"%s\": %m", xlogfpath);
351 : else
352 0 : pg_log_error("could not read file \"%s\": read %d of %zu",
353 : xlogfpath, r, (Size) XLOG_BLCKSZ);
354 :
699 tmunro 355 0 : return -1;
356 : }
357 :
2939 heikki.linnakangas 358 CBC 5819 : Assert(targetSegNo == xlogreadsegno);
359 :
699 tmunro 360 5819 : xlogreader->seg.ws_tli = targetHistory[private->tliIndex].tli;
361 5819 : return XLOG_BLCKSZ;
362 : }
363 :
364 : /*
365 : * Extract information on which blocks the current record modifies.
366 : */
367 : static void
2939 heikki.linnakangas 368 86700 : extractPageInfo(XLogReaderState *record)
369 : {
370 : int block_id;
371 86700 : RmgrId rmid = XLogRecGetRmid(record);
372 86700 : uint8 info = XLogRecGetInfo(record);
373 86700 : uint8 rminfo = info & ~XLR_INFO_MASK;
374 :
375 : /* Is this a special record type that I recognize? */
376 :
376 rhaas 377 86700 : if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_FILE_COPY)
378 : {
379 : /*
380 : * New databases can be safely ignored. It won't be present in the
381 : * source system, so it will be deleted. There's one corner-case,
382 : * though: if a new, different, database is also created in the source
383 : * system, we'll see that the files already exist and not copy them.
384 : * That's OK, though; WAL replay of creating the new database, from
385 : * the source systems's WAL, will re-copy the new database,
386 : * overwriting the database created in the target system.
387 : */
388 : }
389 86700 : else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_CREATE_WAL_LOG)
390 : {
391 : /*
392 : * New databases can be safely ignored. It won't be present in the
393 : * source system, so it will be deleted.
394 : */
395 : }
2939 heikki.linnakangas 396 86696 : else if (rmid == RM_DBASE_ID && rminfo == XLOG_DBASE_DROP)
397 : {
398 : /*
399 : * An existing database was dropped. We'll see that the files don't
400 : * exist in the target data dir, and copy them in toto from the source
401 : * system. No need to do anything special here.
402 : */
403 : }
404 86696 : else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_CREATE)
405 : {
406 : /*
407 : * We can safely ignore these. The file will be removed from the
408 : * target, if it doesn't exist in source system. If a file with same
409 : * name is created in source system, too, there will be WAL records
410 : * for all the blocks in it.
411 : */
412 : }
413 85508 : else if (rmid == RM_SMGR_ID && rminfo == XLOG_SMGR_TRUNCATE)
414 : {
415 : /*
416 : * We can safely ignore these. When we compare the sizes later on,
417 : * we'll notice that they differ, and copy the missing tail from
418 : * source system.
419 : */
420 : }
965 421 85504 : else if (rmid == RM_XACT_ID &&
422 37 : ((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT ||
965 heikki.linnakangas 423 UBC 0 : (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED ||
424 0 : (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT ||
425 0 : (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_ABORT_PREPARED))
426 : {
427 : /*
428 : * These records can include "dropped rels". We can safely ignore
429 : * them, we will see that they are missing and copy them from the
430 : * source.
431 : */
432 : }
2939 heikki.linnakangas 433 CBC 85467 : else if (info & XLR_SPECIAL_REL_UPDATE)
434 : {
435 : /*
436 : * This record type modifies a relation file in some special way, but
437 : * we don't recognize the type. That's bad - we don't know how to
438 : * track that change.
439 : */
1469 peter 440 UBC 0 : pg_fatal("WAL record modifies a relation, but record type is not recognized: "
441 : "lsn: %X/%X, rmid: %d, rmgr: %s, info: %02X",
442 : LSN_FORMAT_ARGS(record->ReadRecPtr),
443 : rmid, RmgrName(rmid), info);
444 : }
445 :
387 tmunro 446 CBC 172430 : for (block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
447 : {
448 : RelFileLocator rlocator;
449 : ForkNumber forknum;
450 : BlockNumber blkno;
451 :
363 tgl 452 85730 : if (!XLogRecGetBlockTagExtended(record, block_id,
453 : &rlocator, &forknum, &blkno, NULL))
2939 heikki.linnakangas 454 948 : continue;
455 :
456 : /* We only care about the main fork; others are copied in toto */
457 85730 : if (forknum != MAIN_FORKNUM)
458 948 : continue;
459 :
277 rhaas 460 GNC 84782 : process_target_wal_block_change(forknum, rlocator, blkno);
461 : }
2939 heikki.linnakangas 462 CBC 86700 : }
|