Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * xlogreader.h
4 : : * Definitions for the generic XLog reading facility
5 : : *
6 : : * Portions Copyright (c) 2013-2024, PostgreSQL Global Development Group
7 : : *
8 : : * IDENTIFICATION
9 : : * src/include/access/xlogreader.h
10 : : *
11 : : * NOTES
12 : : * See the definition of the XLogReaderState struct for instructions on
13 : : * how to use the XLogReader infrastructure.
14 : : *
15 : : * The basic idea is to allocate an XLogReaderState via
16 : : * XLogReaderAllocate(), position the reader to the first record with
17 : : * XLogBeginRead() or XLogFindNextRecord(), and call XLogReadRecord()
18 : : * until it returns NULL.
19 : : *
20 : : * Callers supply a page_read callback if they want to call
21 : : * XLogReadRecord or XLogFindNextRecord; it can be passed in as NULL
22 : : * otherwise. The WALRead function can be used as a helper to write
23 : : * page_read callbacks, but it is not mandatory; callers that use it,
24 : : * must supply segment_open callbacks. The segment_close callback
25 : : * must always be supplied.
26 : : *
27 : : * After reading a record with XLogReadRecord(), it's decomposed into
28 : : * the per-block and main data parts, and the parts can be accessed
29 : : * with the XLogRec* macros and functions. You can also decode a
30 : : * record that's already constructed in memory, without reading from
31 : : * disk, by calling the DecodeXLogRecord() function.
32 : : *-------------------------------------------------------------------------
33 : : */
34 : : #ifndef XLOGREADER_H
35 : : #define XLOGREADER_H
36 : :
37 : : #ifndef FRONTEND
38 : : #include "access/transam.h"
39 : : #endif
40 : :
41 : : #include "access/xlogrecord.h"
42 : : #include "storage/buf.h"
43 : :
44 : : /* WALOpenSegment represents a WAL segment being read. */
45 : : typedef struct WALOpenSegment
46 : : {
47 : : int ws_file; /* segment file descriptor */
48 : : XLogSegNo ws_segno; /* segment number */
49 : : TimeLineID ws_tli; /* timeline ID of the currently open file */
50 : : } WALOpenSegment;
51 : :
52 : : /* WALSegmentContext carries context information about WAL segments to read */
53 : : typedef struct WALSegmentContext
54 : : {
55 : : char ws_dir[MAXPGPATH];
56 : : int ws_segsize;
57 : : } WALSegmentContext;
58 : :
59 : : typedef struct XLogReaderState XLogReaderState;
60 : :
61 : : /* Function type definitions for various xlogreader interactions */
62 : : typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
63 : : XLogRecPtr targetPagePtr,
64 : : int reqLen,
65 : : XLogRecPtr targetRecPtr,
66 : : char *readBuf);
67 : : typedef void (*WALSegmentOpenCB) (XLogReaderState *xlogreader,
68 : : XLogSegNo nextSegNo,
69 : : TimeLineID *tli_p);
70 : : typedef void (*WALSegmentCloseCB) (XLogReaderState *xlogreader);
71 : :
72 : : typedef struct XLogReaderRoutine
73 : : {
74 : : /*
75 : : * Data input callback
76 : : *
77 : : * This callback shall read at least reqLen valid bytes of the xlog page
78 : : * starting at targetPagePtr, and store them in readBuf. The callback
79 : : * shall return the number of bytes read (never more than XLOG_BLCKSZ), or
80 : : * -1 on failure. The callback shall sleep, if necessary, to wait for the
81 : : * requested bytes to become available. The callback will not be invoked
82 : : * again for the same page unless more than the returned number of bytes
83 : : * are needed.
84 : : *
85 : : * targetRecPtr is the position of the WAL record we're reading. Usually
86 : : * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
87 : : * to read and verify the page or segment header, before it reads the
88 : : * actual WAL record it's interested in. In that case, targetRecPtr can
89 : : * be used to determine which timeline to read the page from.
90 : : *
91 : : * The callback shall set ->seg.ws_tli to the TLI of the file the page was
92 : : * read from.
93 : : */
94 : : XLogPageReadCB page_read;
95 : :
96 : : /*
97 : : * Callback to open the specified WAL segment for reading. ->seg.ws_file
98 : : * shall be set to the file descriptor of the opened segment. In case of
99 : : * failure, an error shall be raised by the callback and it shall not
100 : : * return.
101 : : *
102 : : * "nextSegNo" is the number of the segment to be opened.
103 : : *
104 : : * "tli_p" is an input/output argument. WALRead() uses it to pass the
105 : : * timeline in which the new segment should be found, but the callback can
106 : : * use it to return the TLI that it actually opened.
107 : : */
108 : : WALSegmentOpenCB segment_open;
109 : :
110 : : /*
111 : : * WAL segment close callback. ->seg.ws_file shall be set to a negative
112 : : * number.
113 : : */
114 : : WALSegmentCloseCB segment_close;
115 : : } XLogReaderRoutine;
116 : :
117 : : #define XL_ROUTINE(...) &(XLogReaderRoutine){__VA_ARGS__}
118 : :
119 : : typedef struct
120 : : {
121 : : /* Is this block ref in use? */
122 : : bool in_use;
123 : :
124 : : /* Identify the block this refers to */
125 : : RelFileLocator rlocator;
126 : : ForkNumber forknum;
127 : : BlockNumber blkno;
128 : :
129 : : /* Prefetching workspace. */
130 : : Buffer prefetch_buffer;
131 : :
132 : : /* copy of the fork_flags field from the XLogRecordBlockHeader */
133 : : uint8 flags;
134 : :
135 : : /* Information on full-page image, if any */
136 : : bool has_image; /* has image, even for consistency checking */
137 : : bool apply_image; /* has image that should be restored */
138 : : char *bkp_image;
139 : : uint16 hole_offset;
140 : : uint16 hole_length;
141 : : uint16 bimg_len;
142 : : uint8 bimg_info;
143 : :
144 : : /* Buffer holding the rmgr-specific data associated with this block */
145 : : bool has_data;
146 : : char *data;
147 : : uint16 data_len;
148 : : uint16 data_bufsz;
149 : : } DecodedBkpBlock;
150 : :
151 : : /*
152 : : * The decoded contents of a record. This occupies a contiguous region of
153 : : * memory, with main_data and blocks[n].data pointing to memory after the
154 : : * members declared here.
155 : : */
156 : : typedef struct DecodedXLogRecord
157 : : {
158 : : /* Private member used for resource management. */
159 : : size_t size; /* total size of decoded record */
160 : : bool oversized; /* outside the regular decode buffer? */
161 : : struct DecodedXLogRecord *next; /* decoded record queue link */
162 : :
163 : : /* Public members. */
164 : : XLogRecPtr lsn; /* location */
165 : : XLogRecPtr next_lsn; /* location of next record */
166 : : XLogRecord header; /* header */
167 : : RepOriginId record_origin;
168 : : TransactionId toplevel_xid; /* XID of top-level transaction */
169 : : char *main_data; /* record's main data portion */
170 : : uint32 main_data_len; /* main data portion's length */
171 : : int max_block_id; /* highest block_id in use (-1 if none) */
172 : : DecodedBkpBlock blocks[FLEXIBLE_ARRAY_MEMBER];
173 : : } DecodedXLogRecord;
174 : :
175 : : struct XLogReaderState
176 : : {
177 : : /*
178 : : * Operational callbacks
179 : : */
180 : : XLogReaderRoutine routine;
181 : :
182 : : /* ----------------------------------------
183 : : * Public parameters
184 : : * ----------------------------------------
185 : : */
186 : :
187 : : /*
188 : : * System identifier of the xlog files we're about to read. Set to zero
189 : : * (the default value) if unknown or unimportant.
190 : : */
191 : : uint64 system_identifier;
192 : :
193 : : /*
194 : : * Opaque data for callbacks to use. Not used by XLogReader.
195 : : */
196 : : void *private_data;
197 : :
198 : : /*
199 : : * Start and end point of last record read. EndRecPtr is also used as the
200 : : * position to read next. Calling XLogBeginRead() sets EndRecPtr to the
201 : : * starting position and ReadRecPtr to invalid.
202 : : *
203 : : * Start and end point of last record returned by XLogReadRecord(). These
204 : : * are also available as record->lsn and record->next_lsn.
205 : : */
206 : : XLogRecPtr ReadRecPtr; /* start of last record read */
207 : : XLogRecPtr EndRecPtr; /* end+1 of last record read */
208 : :
209 : : /*
210 : : * Set at the end of recovery: the start point of a partial record at the
211 : : * end of WAL (InvalidXLogRecPtr if there wasn't one), and the start
212 : : * location of its first contrecord that went missing.
213 : : */
214 : : XLogRecPtr abortedRecPtr;
215 : : XLogRecPtr missingContrecPtr;
216 : : /* Set when XLP_FIRST_IS_OVERWRITE_CONTRECORD is found */
217 : : XLogRecPtr overwrittenRecPtr;
218 : :
219 : :
220 : : /* ----------------------------------------
221 : : * Decoded representation of current record
222 : : *
223 : : * Use XLogRecGet* functions to investigate the record; these fields
224 : : * should not be accessed directly.
225 : : * ----------------------------------------
226 : : * Start and end point of the last record read and decoded by
227 : : * XLogReadRecordInternal(). NextRecPtr is also used as the position to
228 : : * decode next. Calling XLogBeginRead() sets NextRecPtr and EndRecPtr to
229 : : * the requested starting position.
230 : : */
231 : : XLogRecPtr DecodeRecPtr; /* start of last record decoded */
232 : : XLogRecPtr NextRecPtr; /* end+1 of last record decoded */
233 : : XLogRecPtr PrevRecPtr; /* start of previous record decoded */
234 : :
235 : : /* Last record returned by XLogReadRecord(). */
236 : : DecodedXLogRecord *record;
237 : :
238 : : /* ----------------------------------------
239 : : * private/internal state
240 : : * ----------------------------------------
241 : : */
242 : :
243 : : /*
244 : : * Buffer for decoded records. This is a circular buffer, though
245 : : * individual records can't be split in the middle, so some space is often
246 : : * wasted at the end. Oversized records that don't fit in this space are
247 : : * allocated separately.
248 : : */
249 : : char *decode_buffer;
250 : : size_t decode_buffer_size;
251 : : bool free_decode_buffer; /* need to free? */
252 : : char *decode_buffer_head; /* data is read from the head */
253 : : char *decode_buffer_tail; /* new data is written at the tail */
254 : :
255 : : /*
256 : : * Queue of records that have been decoded. This is a linked list that
257 : : * usually consists of consecutive records in decode_buffer, but may also
258 : : * contain oversized records allocated with palloc().
259 : : */
260 : : DecodedXLogRecord *decode_queue_head; /* oldest decoded record */
261 : : DecodedXLogRecord *decode_queue_tail; /* newest decoded record */
262 : :
263 : : /*
264 : : * Buffer for currently read page (XLOG_BLCKSZ bytes, valid up to at least
265 : : * readLen bytes)
266 : : */
267 : : char *readBuf;
268 : : uint32 readLen;
269 : :
270 : : /* last read XLOG position for data currently in readBuf */
271 : : WALSegmentContext segcxt;
272 : : WALOpenSegment seg;
273 : : uint32 segoff;
274 : :
275 : : /*
276 : : * beginning of prior page read, and its TLI. Doesn't necessarily
277 : : * correspond to what's in readBuf; used for timeline sanity checks.
278 : : */
279 : : XLogRecPtr latestPagePtr;
280 : : TimeLineID latestPageTLI;
281 : :
282 : : /* beginning of the WAL record being read. */
283 : : XLogRecPtr currRecPtr;
284 : : /* timeline to read it from, 0 if a lookup is required */
285 : : TimeLineID currTLI;
286 : :
287 : : /*
288 : : * Safe point to read to in currTLI if current TLI is historical
289 : : * (tliSwitchPoint) or InvalidXLogRecPtr if on current timeline.
290 : : *
291 : : * Actually set to the start of the segment containing the timeline switch
292 : : * that ends currTLI's validity, not the LSN of the switch its self, since
293 : : * we can't assume the old segment will be present.
294 : : */
295 : : XLogRecPtr currTLIValidUntil;
296 : :
297 : : /*
298 : : * If currTLI is not the most recent known timeline, the next timeline to
299 : : * read from when currTLIValidUntil is reached.
300 : : */
301 : : TimeLineID nextTLI;
302 : :
303 : : /*
304 : : * Buffer for current ReadRecord result (expandable), used when a record
305 : : * crosses a page boundary.
306 : : */
307 : : char *readRecordBuf;
308 : : uint32 readRecordBufSize;
309 : :
310 : : /* Buffer to hold error message */
311 : : char *errormsg_buf;
312 : : bool errormsg_deferred;
313 : :
314 : : /*
315 : : * Flag to indicate to XLogPageReadCB that it should not block waiting for
316 : : * data.
317 : : */
318 : : bool nonblocking;
319 : : };
320 : :
321 : : /*
322 : : * Check if XLogNextRecord() has any more queued records or an error to return.
323 : : */
324 : : static inline bool
758 tmunro@postgresql.or 325 :CBC 9398388 : XLogReaderHasQueuedRecordOrError(XLogReaderState *state)
326 : : {
327 [ + + + + ]: 9398388 : return (state->decode_queue_head != NULL) || state->errormsg_deferred;
328 : : }
329 : :
330 : : /* Get a new XLogReader */
331 : : extern XLogReaderState *XLogReaderAllocate(int wal_segment_size,
332 : : const char *waldir,
333 : : XLogReaderRoutine *routine,
334 : : void *private_data);
335 : :
336 : : /* Free an XLogReader */
337 : : extern void XLogReaderFree(XLogReaderState *state);
338 : :
339 : : /* Optionally provide a circular decoding buffer to allow readahead. */
340 : : extern void XLogReaderSetDecodeBuffer(XLogReaderState *state,
341 : : void *buffer,
342 : : size_t size);
343 : :
344 : : /* Position the XLogReader to given record */
345 : : extern void XLogBeginRead(XLogReaderState *state, XLogRecPtr RecPtr);
346 : : extern XLogRecPtr XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr);
347 : :
348 : : /* Return values from XLogPageReadCB. */
349 : : typedef enum XLogPageReadResult
350 : : {
351 : : XLREAD_SUCCESS = 0, /* record is successfully read */
352 : : XLREAD_FAIL = -1, /* failed during reading a record */
353 : : XLREAD_WOULDBLOCK = -2, /* nonblocking mode only, no data */
354 : : } XLogPageReadResult;
355 : :
356 : : /* Read the next XLog record. Returns NULL on end-of-WAL or failure */
357 : : extern struct XLogRecord *XLogReadRecord(XLogReaderState *state,
358 : : char **errormsg);
359 : :
360 : : /* Consume the next record or error. */
361 : : extern DecodedXLogRecord *XLogNextRecord(XLogReaderState *state,
362 : : char **errormsg);
363 : :
364 : : /* Release the previously returned record, if necessary. */
365 : : extern XLogRecPtr XLogReleasePreviousRecord(XLogReaderState *state);
366 : :
367 : : /* Try to read ahead, if there is data and space. */
368 : : extern DecodedXLogRecord *XLogReadAhead(XLogReaderState *state,
369 : : bool nonblocking);
370 : :
371 : : /* Validate a page */
372 : : extern bool XLogReaderValidatePageHeader(XLogReaderState *state,
373 : : XLogRecPtr recptr, char *phdr);
374 : :
375 : : /* Forget error produced by XLogReaderValidatePageHeader(). */
376 : : extern void XLogReaderResetError(XLogReaderState *state);
377 : :
378 : : /*
379 : : * Error information from WALRead that both backend and frontend caller can
380 : : * process. Currently only errors from pg_pread can be reported.
381 : : */
382 : : typedef struct WALReadError
383 : : {
384 : : int wre_errno; /* errno set by the last pg_pread() */
385 : : int wre_off; /* Offset we tried to read from. */
386 : : int wre_req; /* Bytes requested to be read. */
387 : : int wre_read; /* Bytes read by the last read(). */
388 : : WALOpenSegment wre_seg; /* Segment we tried to read from. */
389 : : } WALReadError;
390 : :
391 : : extern bool WALRead(XLogReaderState *state,
392 : : char *buf, XLogRecPtr startptr, Size count,
393 : : TimeLineID tli, WALReadError *errinfo);
394 : :
395 : : /* Functions for decoding an XLogRecord */
396 : :
397 : : extern size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len);
398 : : extern bool DecodeXLogRecord(XLogReaderState *state,
399 : : DecodedXLogRecord *decoded,
400 : : XLogRecord *record,
401 : : XLogRecPtr lsn,
402 : : char **errormsg);
403 : :
404 : : /*
405 : : * Macros that provide access to parts of the record most recently returned by
406 : : * XLogReadRecord() or XLogNextRecord().
407 : : */
408 : : #define XLogRecGetTotalLen(decoder) ((decoder)->record->header.xl_tot_len)
409 : : #define XLogRecGetPrev(decoder) ((decoder)->record->header.xl_prev)
410 : : #define XLogRecGetInfo(decoder) ((decoder)->record->header.xl_info)
411 : : #define XLogRecGetRmid(decoder) ((decoder)->record->header.xl_rmid)
412 : : #define XLogRecGetXid(decoder) ((decoder)->record->header.xl_xid)
413 : : #define XLogRecGetOrigin(decoder) ((decoder)->record->record_origin)
414 : : #define XLogRecGetTopXid(decoder) ((decoder)->record->toplevel_xid)
415 : : #define XLogRecGetData(decoder) ((decoder)->record->main_data)
416 : : #define XLogRecGetDataLen(decoder) ((decoder)->record->main_data_len)
417 : : #define XLogRecHasAnyBlockRefs(decoder) ((decoder)->record->max_block_id >= 0)
418 : : #define XLogRecMaxBlockId(decoder) ((decoder)->record->max_block_id)
419 : : #define XLogRecGetBlock(decoder, i) (&(decoder)->record->blocks[(i)])
420 : : #define XLogRecHasBlockRef(decoder, block_id) \
421 : : (((decoder)->record->max_block_id >= (block_id)) && \
422 : : ((decoder)->record->blocks[block_id].in_use))
423 : : #define XLogRecHasBlockImage(decoder, block_id) \
424 : : ((decoder)->record->blocks[block_id].has_image)
425 : : #define XLogRecBlockImageApply(decoder, block_id) \
426 : : ((decoder)->record->blocks[block_id].apply_image)
427 : : #define XLogRecHasBlockData(decoder, block_id) \
428 : : ((decoder)->record->blocks[block_id].has_data)
429 : :
430 : : #ifndef FRONTEND
431 : : extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record);
432 : : #endif
433 : :
434 : : extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page);
435 : : extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len);
436 : : extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
437 : : RelFileLocator *rlocator, ForkNumber *forknum,
438 : : BlockNumber *blknum);
439 : : extern bool XLogRecGetBlockTagExtended(XLogReaderState *record, uint8 block_id,
440 : : RelFileLocator *rlocator, ForkNumber *forknum,
441 : : BlockNumber *blknum,
442 : : Buffer *prefetch_buffer);
443 : :
444 : : #endif /* XLOGREADER_H */
|