Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.h
4 : * Standard POSTGRES buffer page definitions.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/include/storage/bufpage.h
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 : #ifndef BUFPAGE_H
15 : #define BUFPAGE_H
16 :
17 : #include "access/xlogdefs.h"
18 : #include "storage/block.h"
19 : #include "storage/item.h"
20 : #include "storage/off.h"
21 :
22 : /*
23 : * A postgres disk page is an abstraction layered on top of a postgres
24 : * disk block (which is simply a unit of i/o, see block.h).
25 : *
26 : * specifically, while a disk block can be unformatted, a postgres
27 : * disk page is always a slotted page of the form:
28 : *
29 : * +----------------+---------------------------------+
30 : * | PageHeaderData | linp1 linp2 linp3 ... |
31 : * +-----------+----+---------------------------------+
32 : * | ... linpN | |
33 : * +-----------+--------------------------------------+
34 : * | ^ pd_lower |
35 : * | |
36 : * | v pd_upper |
37 : * +-------------+------------------------------------+
38 : * | | tupleN ... |
39 : * +-------------+------------------+-----------------+
40 : * | ... tuple3 tuple2 tuple1 | "special space" |
41 : * +--------------------------------+-----------------+
42 : * ^ pd_special
43 : *
44 : * a page is full when nothing can be added between pd_lower and
45 : * pd_upper.
46 : *
47 : * all blocks written out by an access method must be disk pages.
48 : *
49 : * EXCEPTIONS:
50 : *
51 : * obviously, a page is not formatted before it is initialized by
52 : * a call to PageInit.
53 : *
54 : * NOTES:
55 : *
56 : * linp1..N form an ItemId (line pointer) array. ItemPointers point
57 : * to a physical block number and a logical offset (line pointer
58 : * number) within that block/page. Note that OffsetNumbers
59 : * conventionally start at 1, not 0.
60 : *
61 : * tuple1..N are added "backwards" on the page. Since an ItemPointer
62 : * offset is used to access an ItemId entry rather than an actual
63 : * byte-offset position, tuples can be physically shuffled on a page
64 : * whenever the need arises. This indirection also keeps crash recovery
65 : * relatively simple, because the low-level details of page space
66 : * management can be controlled by standard buffer page code during
67 : * logging, and during recovery.
68 : *
69 : * AM-generic per-page information is kept in PageHeaderData.
70 : *
71 : * AM-specific per-page data (if any) is kept in the area marked "special
72 : * space"; each AM has an "opaque" structure defined somewhere that is
73 : * stored as the page trailer. an access method should always
74 : * initialize its pages with PageInit and then set its own opaque
75 : * fields.
76 : */
77 :
78 : typedef Pointer Page;
79 :
80 :
81 : /*
82 : * location (byte offset) within a page.
83 : *
84 : * note that this is actually limited to 2^15 because we have limited
85 : * ItemIdData.lp_off and ItemIdData.lp_len to 15 bits (see itemid.h).
86 : */
87 : typedef uint16 LocationIndex;
88 :
89 :
90 : /*
91 : * For historical reasons, the 64-bit LSN value is stored as two 32-bit
92 : * values.
93 : */
94 : typedef struct
95 : {
96 : uint32 xlogid; /* high bits */
97 : uint32 xrecoff; /* low bits */
98 : } PageXLogRecPtr;
99 :
100 : static inline XLogRecPtr
272 peter 101 GNC 34404016 : PageXLogRecPtrGet(PageXLogRecPtr val)
102 : {
103 34404016 : return (uint64) val.xlogid << 32 | val.xrecoff;
104 : }
105 :
106 : #define PageXLogRecPtrSet(ptr, lsn) \
3602 bruce 107 ECB : ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn))
108 :
109 : /*
110 : * disk page organization
111 : *
112 : * space management information generic to any page
113 : *
114 : * pd_lsn - identifies xlog record for last change to this page.
115 : * pd_checksum - page checksum, if set.
116 : * pd_flags - flag bits.
117 : * pd_lower - offset to start of free space.
118 : * pd_upper - offset to end of free space.
119 : * pd_special - offset to start of special space.
120 : * pd_pagesize_version - size in bytes and page layout version number.
121 : * pd_prune_xid - oldest XID among potentially prunable tuples on page.
122 : *
123 : * The LSN is used by the buffer manager to enforce the basic rule of WAL:
124 : * "thou shalt write xlog before data". A dirty buffer cannot be dumped
125 : * to disk until xlog has been flushed at least as far as the page's LSN.
126 : *
127 : * pd_checksum stores the page checksum, if it has been set for this page;
128 : * zero is a valid value for a checksum. If a checksum is not in use then
129 : * we leave the field unset. This will typically mean the field is zero
130 : * though non-zero values may also be present if databases have been
131 : * pg_upgraded from releases prior to 9.3, when the same byte offset was
132 : * used to store the current timelineid when the page was last updated.
133 : * Note that there is no indication on a page as to whether the checksum
134 : * is valid or not, a deliberate design choice which avoids the problem
135 : * of relying on the page contents to decide whether to verify it. Hence
136 : * there are no flag bits relating to checksums.
137 : *
138 : * pd_prune_xid is a hint field that helps determine whether pruning will be
139 : * useful. It is currently unused in index pages.
140 : *
141 : * The page version number and page size are packed together into a single
142 : * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
143 : * there was no concept of a page version number, and doing it this way
144 : * lets us pretend that pre-7.3 databases have page version number zero.
145 : * We constrain page sizes to be multiples of 256, leaving the low eight
146 : * bits available for a version number.
147 : *
148 : * Minimum possible page size is perhaps 64B to fit page header, opaque space
149 : * and a minimal tuple; of course, in reality you want it much bigger, so
150 : * the constraint on pagesize mod 256 is not an important restriction.
151 : * On the high end, we can only support pages up to 32KB because lp_off/lp_len
152 : * are 15 bits.
153 : */
154 :
155 : typedef struct PageHeaderData
156 : {
157 : /* XXX LSN is member of *any* block, not only page-organized ones */
158 : PageXLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
159 : * record for last change to this page */
160 : uint16 pd_checksum; /* checksum */
161 : uint16 pd_flags; /* flag bits, see below */
162 : LocationIndex pd_lower; /* offset to start of free space */
163 : LocationIndex pd_upper; /* offset to end of free space */
164 : LocationIndex pd_special; /* offset to start of special space */
165 : uint16 pd_pagesize_version;
166 : TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
167 : ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */
168 : } PageHeaderData;
169 :
170 : typedef PageHeaderData *PageHeader;
171 :
172 : /*
173 : * pd_flags contains the following flag bits. Undefined bits are initialized
174 : * to zero and may be used in the future.
175 : *
176 : * PD_HAS_FREE_LINES is set if there are any LP_UNUSED line pointers before
177 : * pd_lower. This should be considered a hint rather than the truth, since
178 : * changes to it are not WAL-logged.
179 : *
180 : * PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
181 : * page for its new tuple version; this suggests that a prune is needed.
182 : * Again, this is just a hint.
183 : */
184 : #define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
185 : #define PD_PAGE_FULL 0x0002 /* not enough free space for new tuple? */
186 : #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
187 : * everyone */
188 :
189 : #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
190 :
191 : /*
192 : * Page layout version number 0 is for pre-7.3 Postgres releases.
193 : * Releases 7.3 and 7.4 use 1, denoting a new HeapTupleHeader layout.
194 : * Release 8.0 uses 2; it changed the HeapTupleHeader layout again.
195 : * Release 8.1 uses 3; it redefined HeapTupleHeader infomask bits.
196 : * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
197 : * added the pd_flags field (by stealing some bits from pd_tli),
198 : * as well as adding the pd_prune_xid field (which enlarges the header).
199 : *
200 : * As of Release 9.3, the checksum version must also be considered when
201 : * handling pages.
202 : */
203 : #define PG_PAGE_LAYOUT_VERSION 4
204 : #define PG_DATA_CHECKSUM_VERSION 1
205 :
206 : /* ----------------------------------------------------------------
207 : * page support functions
208 : * ----------------------------------------------------------------
209 : */
210 :
211 : /*
212 : * line pointer(s) do not count as part of header
213 : */
214 : #define SizeOfPageHeaderData (offsetof(PageHeaderData, pd_linp))
215 :
216 : /*
217 : * PageIsEmpty
218 : * returns true iff no itemid has been allocated on the page
9720 scrappy 219 : */
220 : static inline bool
272 peter 221 GNC 1351662 : PageIsEmpty(Page page)
222 : {
223 1351662 : return ((PageHeader) page)->pd_lower <= SizeOfPageHeaderData;
224 : }
225 :
226 : /*
227 : * PageIsNew
228 : * returns true iff page has not been initialized (by PageInit)
229 : */
230 : static inline bool
231 52293981 : PageIsNew(Page page)
232 : {
233 52293981 : return ((PageHeader) page)->pd_upper == 0;
234 : }
235 :
9720 scrappy 236 ECB : /*
237 : * PageGetItemId
9345 bruce 238 : * Returns an item identifier of a page.
239 : */
240 : static inline ItemId
272 peter 241 GNC 1309694897 : PageGetItemId(Page page, OffsetNumber offsetNumber)
242 : {
243 1309694897 : return &((PageHeader) page)->pd_linp[offsetNumber - 1];
244 : }
245 :
246 : /*
247 : * PageGetContents
248 : * To be used in cases where the page does not contain line pointers.
5383 tgl 249 ECB : *
250 : * Note: prior to 8.3 this was not guaranteed to yield a MAXALIGN'd result.
251 : * Now it is. Beware of old code that might think the offset to the contents
252 : * is just SizeOfPageHeaderData rather than MAXALIGN(SizeOfPageHeaderData).
253 : */
254 : static inline char *
272 peter 255 GNC 39091599 : PageGetContents(Page page)
256 : {
257 39091599 : return (char *) page + MAXALIGN(SizeOfPageHeaderData);
258 : }
259 :
260 : /* ----------------
261 : * functions to access page size info
262 : * ----------------
263 : */
264 :
265 : /*
266 : * PageGetPageSize
267 : * Returns the page size of a page.
268 : *
269 : * this can only be called on a formatted page (unlike
270 : * BufferGetPageSize, which can be called on an unformatted page).
271 : * however, it can be called on a page that is not stored in a buffer.
272 : */
273 : static inline Size
274 58574196 : PageGetPageSize(Page page)
275 : {
276 58574196 : return (Size) (((PageHeader) page)->pd_pagesize_version & (uint16) 0xFF00);
277 : }
278 :
279 : /*
280 : * PageGetPageLayoutVersion
281 : * Returns the page layout version of a page.
9720 scrappy 282 ECB : */
283 : static inline uint8
272 peter 284 GNC 3 : PageGetPageLayoutVersion(Page page)
285 : {
286 3 : return (((PageHeader) page)->pd_pagesize_version & 0x00FF);
287 : }
288 :
289 : /*
290 : * PageSetPageSizeAndVersion
291 : * Sets the page size and page layout version number of a page.
292 : *
293 : * We could support setting these two values separately, but there's
294 : * no real need for it at the moment.
7524 tgl 295 ECB : */
296 : static inline void
272 peter 297 GNC 603792 : PageSetPageSizeAndVersion(Page page, Size size, uint8 version)
298 : {
299 603792 : Assert((size & 0xFF00) == size);
300 : Assert((version & 0x00FF) == version);
301 :
302 603792 : ((PageHeader) page)->pd_pagesize_version = size | version;
303 603792 : }
304 :
305 : /* ----------------
306 : * page special data functions
307 : * ----------------
308 : */
309 : /*
8720 bruce 310 ECB : * PageGetSpecialSize
311 : * Returns size of special space on a page.
9720 scrappy 312 : */
313 : static inline uint16
272 peter 314 GNC 32111566 : PageGetSpecialSize(Page page)
315 : {
316 32111566 : return (PageGetPageSize(page) - ((PageHeader) page)->pd_special);
317 : }
9720 scrappy 318 ECB :
2556 kgrittn 319 : /*
320 : * Using assertions, validate that the page special pointer is OK.
321 : *
322 : * This is intended to catch use of the pointer before page initialization.
323 : */
324 : static inline void
2556 kgrittn 325 GIC 537001033 : PageValidateSpecialPointer(Page page)
2556 kgrittn 326 ECB : {
272 peter 327 GNC 537001033 : Assert(page);
328 537001033 : Assert(((PageHeader) page)->pd_special <= BLCKSZ);
329 537001033 : Assert(((PageHeader) page)->pd_special >= SizeOfPageHeaderData);
2556 kgrittn 330 GIC 537001033 : }
331 :
332 : /*
333 : * PageGetSpecialPointer
334 : * Returns pointer to special space on a page.
9720 scrappy 335 ECB : */
336 : static inline char *
272 peter 337 GNC 537001033 : PageGetSpecialPointer(Page page)
338 : {
339 537001033 : PageValidateSpecialPointer(page);
340 537001033 : return (char *) page + ((PageHeader) page)->pd_special;
341 : }
342 :
343 : /*
344 : * PageGetItem
345 : * Retrieves an item on the given page.
346 : *
347 : * Note:
8492 tgl 348 ECB : * This does not change the status of any of the resources passed.
349 : * The semantics may change in the future.
9359 bruce 350 : */
351 : static inline Item
272 peter 352 GNC 1055024815 : PageGetItem(Page page, ItemId itemId)
353 : {
354 1055024815 : Assert(page);
355 1055024815 : Assert(ItemIdHasStorage(itemId));
356 :
357 1055024815 : return (Item) (((char *) page) + ItemIdGetOffset(itemId));
358 : }
359 :
360 : /*
361 : * PageGetMaxOffsetNumber
362 : * Returns the maximum offset number used by the given page.
363 : * Since offset numbers are 1-based, this is also the number
364 : * of items on the page.
9116 bruce 365 ECB : *
366 : * NOTE: if the page is not initialized (pd_lower == 0), we must
367 : * return zero to ensure sane behavior.
368 : */
369 : static inline OffsetNumber
272 peter 370 GNC 379804800 : PageGetMaxOffsetNumber(Page page)
371 : {
372 379804800 : PageHeader pageheader = (PageHeader) page;
373 :
374 379804800 : if (pageheader->pd_lower <= SizeOfPageHeaderData)
375 716104 : return 0;
376 : else
377 379088696 : return (pageheader->pd_lower - SizeOfPageHeaderData) / sizeof(ItemIdData);
378 : }
379 :
380 : /*
381 : * Additional functions for access to page headers.
382 : */
383 : static inline XLogRecPtr
384 33375036 : PageGetLSN(Page page)
385 : {
386 33375036 : return PageXLogRecPtrGet(((PageHeader) page)->pd_lsn);
387 : }
388 : static inline void
389 26414018 : PageSetLSN(Page page, XLogRecPtr lsn)
390 : {
391 26414018 : PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn);
392 26414018 : }
272 peter 393 ECB :
394 : static inline bool
272 peter 395 GNC 16708626 : PageHasFreeLinePointers(Page page)
396 : {
397 16708626 : return ((PageHeader) page)->pd_flags & PD_HAS_FREE_LINES;
398 : }
399 : static inline void
400 47744 : PageSetHasFreeLinePointers(Page page)
401 : {
402 47744 : ((PageHeader) page)->pd_flags |= PD_HAS_FREE_LINES;
403 47744 : }
404 : static inline void
405 108635 : PageClearHasFreeLinePointers(Page page)
406 : {
407 108635 : ((PageHeader) page)->pd_flags &= ~PD_HAS_FREE_LINES;
408 108635 : }
272 peter 409 ECB :
410 : static inline bool
272 peter 411 GNC 1390944 : PageIsFull(Page page)
412 : {
413 1390944 : return ((PageHeader) page)->pd_flags & PD_PAGE_FULL;
414 : }
415 : static inline void
416 196178 : PageSetFull(Page page)
417 : {
418 196178 : ((PageHeader) page)->pd_flags |= PD_PAGE_FULL;
419 196178 : }
420 : static inline void
421 113033 : PageClearFull(Page page)
422 : {
423 113033 : ((PageHeader) page)->pd_flags &= ~PD_PAGE_FULL;
424 113033 : }
5240 heikki.linnakangas 425 ECB :
426 : static inline bool
272 peter 427 GNC 49602230 : PageIsAllVisible(Page page)
428 : {
429 49602230 : return ((PageHeader) page)->pd_flags & PD_ALL_VISIBLE;
430 : }
431 : static inline void
432 159423 : PageSetAllVisible(Page page)
433 : {
434 159423 : ((PageHeader) page)->pd_flags |= PD_ALL_VISIBLE;
435 159423 : }
436 : static inline void
437 13435 : PageClearAllVisible(Page page)
438 : {
439 13435 : ((PageHeader) page)->pd_flags &= ~PD_ALL_VISIBLE;
440 13435 : }
272 peter 441 ECB :
442 : /*
443 : * These two require "access/transam.h", so left as macros.
444 : */
5679 tgl 445 : #define PageSetPrunable(page, xid) \
446 : do { \
447 : Assert(TransactionIdIsNormal(xid)); \
448 : if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \
449 : TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \
450 : ((PageHeader) (page))->pd_prune_xid = (xid); \
451 : } while (0)
452 : #define PageClearPrunable(page) \
453 : (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
454 :
8249 vadim4o 455 :
9720 scrappy 456 : /* ----------------------------------------------------------------
457 : * extern declarations
458 : * ----------------------------------------------------------------
459 : */
895 michael 460 :
461 : /* flags for PageAddItemExtended() */
462 : #define PAI_OVERWRITE (1 << 0)
463 : #define PAI_IS_HEAP (1 << 1)
9720 scrappy 464 :
465 : /* flags for PageIsVerifiedExtended() */
895 michael 466 : #define PIV_LOG_WARNING (1 << 0)
467 : #define PIV_REPORT_STAT (1 << 1)
468 :
268 peter 469 : #define PageAddItem(page, item, size, offsetNumber, overwrite, is_heap) \
470 : PageAddItemExtended(page, item, size, offsetNumber, \
471 : ((overwrite) ? PAI_OVERWRITE : 0) | \
472 : ((is_heap) ? PAI_IS_HEAP : 0))
473 :
474 : #define PageIsVerified(page, blkno) \
475 : PageIsVerifiedExtended(page, blkno, \
476 : PIV_LOG_WARNING | PIV_REPORT_STAT)
477 :
478 : /*
479 : * Check that BLCKSZ is a multiple of sizeof(size_t). In
895 michael 480 : * PageIsVerifiedExtended(), it is much faster to check if a page is
481 : * full of zeroes using the native word size. Note that this assertion
482 : * is kept within a header to make sure that StaticAssertDecl() works
483 : * across various combinations of platforms and compilers.
484 : */
1161 485 : StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)),
486 : "BLCKSZ has to be a multiple of sizeof(size_t)");
487 :
9344 bruce 488 : extern void PageInit(Page page, Size pageSize, Size specialSize);
489 : extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags);
2505 alvherre 490 : extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size,
491 : OffsetNumber offsetNumber, int flags);
5270 tgl 492 : extern Page PageGetTempPage(Page page);
493 : extern Page PageGetTempPageCopy(Page page);
494 : extern Page PageGetTempPageCopySpecial(Page page);
495 : extern void PageRestoreTempPage(Page tempPage, Page oldPage);
496 : extern void PageRepairFragmentation(Page page);
497 : extern void PageTruncateLinePointerArray(Page page);
498 : extern Size PageGetFreeSpace(Page page);
499 : extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
500 : extern Size PageGetExactFreeSpace(Page page);
501 : extern Size PageGetHeapFreeSpace(Page page);
502 : extern void PageIndexTupleDelete(Page page, OffsetNumber offnum);
503 : extern void PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems);
504 : extern void PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum);
505 : extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
506 : Item newtup, Size newsize);
507 : extern char *PageSetChecksumCopy(Page page, BlockNumber blkno);
508 : extern void PageSetChecksumInplace(Page page, BlockNumber blkno);
509 :
510 : #endif /* BUFPAGE_H */
|