Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * bufpage.c
4 : : * POSTGRES standard buffer page code.
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/storage/page/bufpage.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/htup_details.h"
18 : : #include "access/itup.h"
19 : : #include "access/xlog.h"
20 : : #include "pgstat.h"
21 : : #include "storage/checksum.h"
22 : : #include "utils/memdebug.h"
23 : : #include "utils/memutils.h"
24 : :
25 : :
26 : : /* GUC variable */
27 : : bool ignore_checksum_failure = false;
28 : :
29 : :
30 : : /* ----------------------------------------------------------------
31 : : * Page support functions
32 : : * ----------------------------------------------------------------
33 : : */
34 : :
35 : : /*
36 : : * PageInit
37 : : * Initializes the contents of a page.
38 : : * Note that we don't calculate an initial checksum here; that's not done
39 : : * until it's time to write.
40 : : */
41 : : void
10141 scrappy@hub.org 42 :CBC 317495 : PageInit(Page page, Size pageSize, Size specialSize)
43 : : {
9715 bruce@momjian.us 44 : 317495 : PageHeader p = (PageHeader) page;
45 : :
8209 tgl@sss.pgh.pa.us 46 : 317495 : specialSize = MAXALIGN(specialSize);
47 : :
9716 bruce@momjian.us 48 [ - + ]: 317495 : Assert(pageSize == BLCKSZ);
7957 49 [ - + ]: 317495 : Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 : :
51 : : /* Make sure all fields of page are zero, as well as unused space */
8125 tgl@sss.pgh.pa.us 52 [ + - + - : 317495 : MemSet(p, 0, pageSize);
+ - - + -
- ]
53 : :
4041 simon@2ndQuadrant.co 54 : 317495 : p->pd_flags = 0;
7957 bruce@momjian.us 55 : 317495 : p->pd_lower = SizeOfPageHeaderData;
9716 56 : 317495 : p->pd_upper = pageSize - specialSize;
57 : 317495 : p->pd_special = pageSize - specialSize;
7895 tgl@sss.pgh.pa.us 58 : 317495 : PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
59 : : /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
8506 vadim4o@yahoo.com 60 : 317495 : }
61 : :
62 : :
63 : : /*
64 : : * PageIsVerifiedExtended
65 : : * Check that the page header and checksum (if any) appear valid.
66 : : *
67 : : * This is called when a page has just been read in from disk. The idea is
68 : : * to cheaply detect trashed pages before we go nuts following bogus line
69 : : * pointers, testing invalid transaction identifiers, etc.
70 : : *
71 : : * It turns out to be necessary to allow zeroed pages here too. Even though
72 : : * this routine is *not* called when deliberately adding a page to a relation,
73 : : * there are scenarios in which a zeroed page might be found in a table.
74 : : * (Example: a backend extends a relation, then crashes before it can write
75 : : * any WAL entry about the new page. The kernel will already have the
76 : : * zeroed page in the file, and it will stay that way after restart.) So we
77 : : * allow zeroed pages here, and are careful that the page access macros
78 : : * treat such a page as empty and without free space. Eventually, VACUUM
79 : : * will clean up such a page and make it usable.
80 : : *
81 : : * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
82 : : * a checksum failure.
83 : : *
84 : : * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
85 : : * to pgstat.
86 : : */
87 : : bool
1266 michael@paquier.xyz 88 : 1142934 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
89 : : {
4041 simon@2ndQuadrant.co 90 : 1142934 : PageHeader p = (PageHeader) page;
91 : : size_t *pagebytes;
92 : : int i;
93 : 1142934 : bool checksum_failure = false;
94 : 1142934 : bool header_sane = false;
95 : 1142934 : bool all_zeroes = false;
tgl@sss.pgh.pa.us 96 : 1142934 : uint16 checksum = 0;
97 : :
98 : : /*
99 : : * Don't verify page data unless the page passes basic non-zero test
100 : : */
simon@2ndQuadrant.co 101 [ + + ]: 1142934 : if (!PageIsNew(page))
102 : : {
2197 magnus@hagander.net 103 [ + + ]: 1139788 : if (DataChecksumsEnabled())
104 : : {
3958 tgl@sss.pgh.pa.us 105 : 1994 : checksum = pg_checksum_page((char *) page, blkno);
106 : :
4041 simon@2ndQuadrant.co 107 [ - + ]: 1994 : if (checksum != p->pd_checksum)
4041 simon@2ndQuadrant.co 108 :UBC 0 : checksum_failure = true;
109 : : }
110 : :
111 : : /*
112 : : * The following checks don't prove the header is correct, only that
113 : : * it looks sane enough to allow into the buffer pool. Later usage of
114 : : * the block can still reveal problems, which is why we offer the
115 : : * checksum option.
116 : : */
4041 simon@2ndQuadrant.co 117 [ + - ]:CBC 1139788 : if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
3973 bruce@momjian.us 118 [ + - ]: 1139788 : p->pd_lower <= p->pd_upper &&
119 [ + - ]: 1139788 : p->pd_upper <= p->pd_special &&
120 [ + - ]: 1139788 : p->pd_special <= BLCKSZ &&
121 [ + - ]: 1139788 : p->pd_special == MAXALIGN(p->pd_special))
4041 simon@2ndQuadrant.co 122 : 1139788 : header_sane = true;
123 : :
124 [ + - + - ]: 1139788 : if (header_sane && !checksum_failure)
125 : 1139788 : return true;
126 : : }
127 : :
128 : : /* Check all-zeroes case */
129 : 3146 : all_zeroes = true;
2775 andres@anarazel.de 130 : 3146 : pagebytes = (size_t *) page;
131 [ + + ]: 3224650 : for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
132 : : {
7688 tgl@sss.pgh.pa.us 133 [ - + ]: 3221504 : if (pagebytes[i] != 0)
134 : : {
4041 simon@2ndQuadrant.co 135 :UBC 0 : all_zeroes = false;
136 : 0 : break;
137 : : }
138 : : }
139 : :
4041 simon@2ndQuadrant.co 140 [ + - ]:CBC 3146 : if (all_zeroes)
141 : 3146 : return true;
142 : :
143 : : /*
144 : : * Throw a WARNING if the checksum fails, but only after we've checked for
145 : : * the all-zeroes case.
146 : : */
4041 simon@2ndQuadrant.co 147 [ # # ]:UBC 0 : if (checksum_failure)
148 : : {
1266 michael@paquier.xyz 149 [ # # ]: 0 : if ((flags & PIV_LOG_WARNING) != 0)
150 [ # # ]: 0 : ereport(WARNING,
151 : : (errcode(ERRCODE_DATA_CORRUPTED),
152 : : errmsg("page verification failed, calculated checksum %u but expected %u",
153 : : checksum, p->pd_checksum)));
154 : :
155 [ # # ]: 0 : if ((flags & PIV_REPORT_STAT) != 0)
156 : 0 : pgstat_report_checksum_failure();
157 : :
4041 simon@2ndQuadrant.co 158 [ # # # # ]: 0 : if (header_sane && ignore_checksum_failure)
159 : 0 : return true;
160 : : }
161 : :
162 : 0 : return false;
163 : : }
164 : :
165 : :
166 : : /*
167 : : * PageAddItemExtended
168 : : *
169 : : * Add an item to a page. Return value is the offset at which it was
170 : : * inserted, or InvalidOffsetNumber if the item is not inserted for any
171 : : * reason. A WARNING is issued indicating the reason for the refusal.
172 : : *
173 : : * offsetNumber must be either InvalidOffsetNumber to specify finding a
174 : : * free line pointer, or a value between FirstOffsetNumber and one past
175 : : * the last existing item, to specify using that particular line pointer.
176 : : *
177 : : * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
178 : : * the item at the specified offsetNumber, which must be either a
179 : : * currently-unused line pointer, or one past the last existing item.
180 : : *
181 : : * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
182 : : * the item at the specified offsetNumber, moving existing items later
183 : : * in the array to make room.
184 : : *
185 : : * If offsetNumber is not valid, then assign a slot by finding the first
186 : : * one that is both unused and deallocated.
187 : : *
188 : : * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
189 : : * MaxHeapTuplesPerPage line pointers on the page.
190 : : *
191 : : * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
192 : : */
193 : : OffsetNumber
2876 alvherre@alvh.no-ip. 194 :CBC 32522283 : PageAddItemExtended(Page page,
195 : : Item item,
196 : : Size size,
197 : : OffsetNumber offsetNumber,
198 : : int flags)
199 : : {
8125 tgl@sss.pgh.pa.us 200 : 32522283 : PageHeader phdr = (PageHeader) page;
201 : : Size alignedSize;
202 : : int lower;
203 : : int upper;
204 : : ItemId itemId;
205 : : OffsetNumber limit;
8668 206 : 32522283 : bool needshuffle = false;
207 : :
208 : : /*
209 : : * Be wary about corrupted page pointers
210 : : */
7957 bruce@momjian.us 211 [ + - ]: 32522283 : if (phdr->pd_lower < SizeOfPageHeaderData ||
8125 tgl@sss.pgh.pa.us 212 [ + - ]: 32522283 : phdr->pd_lower > phdr->pd_upper ||
213 [ + - ]: 32522283 : phdr->pd_upper > phdr->pd_special ||
214 [ - + ]: 32522283 : phdr->pd_special > BLCKSZ)
7570 tgl@sss.pgh.pa.us 215 [ # # ]:UBC 0 : ereport(PANIC,
216 : : (errcode(ERRCODE_DATA_CORRUPTED),
217 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
218 : : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
219 : :
220 : : /*
221 : : * Select offsetNumber to place the new item at
222 : : */
9716 bruce@momjian.us 223 :CBC 32522283 : limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
224 : :
225 : : /* was offsetNumber passed in? */
226 [ + + + - : 32522283 : if (OffsetNumberIsValid(offsetNumber))
+ + ]
227 : : {
228 : : /* yes, check it */
2876 alvherre@alvh.no-ip. 229 [ + + ]: 21510894 : if ((flags & PAI_OVERWRITE) != 0)
230 : : {
8468 vadim4o@yahoo.com 231 [ + + ]: 1717328 : if (offsetNumber < limit)
232 : : {
643 peter@eisentraut.org 233 : 66865 : itemId = PageGetItemId(page, offsetNumber);
6059 tgl@sss.pgh.pa.us 234 [ + - - + ]: 66865 : if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
235 : : {
7570 tgl@sss.pgh.pa.us 236 [ # # ]:UBC 0 : elog(WARNING, "will not overwrite a used ItemId");
8468 vadim4o@yahoo.com 237 : 0 : return InvalidOffsetNumber;
238 : : }
239 : : }
240 : : }
241 : : else
242 : : {
7922 tgl@sss.pgh.pa.us 243 [ + + ]:CBC 19793566 : if (offsetNumber < limit)
2489 244 : 2950490 : needshuffle = true; /* need to move existing linp's */
245 : : }
246 : : }
247 : : else
248 : : {
249 : : /* offsetNumber was not passed in, so find a free slot */
250 : : /* if no free slot, we'll put it at limit (1st open slot) */
643 peter@eisentraut.org 251 [ + + ]: 11011389 : if (PageHasFreeLinePointers(page))
252 : : {
253 : : /*
254 : : * Scan line pointer array to locate a "recyclable" (unused)
255 : : * ItemId.
256 : : *
257 : : * Always use earlier items first. PageTruncateLinePointerArray
258 : : * can only truncate unused items when they appear as a contiguous
259 : : * group at the end of the line pointer array.
260 : : */
1103 pg@bowt.ie 261 : 143181 : for (offsetNumber = FirstOffsetNumber;
1068 tgl@sss.pgh.pa.us 262 [ + + ]: 9298733 : offsetNumber < limit; /* limit is maxoff+1 */
1103 pg@bowt.ie 263 : 9155552 : offsetNumber++)
264 : : {
643 peter@eisentraut.org 265 : 9291562 : itemId = PageGetItemId(page, offsetNumber);
266 : :
267 : : /*
268 : : * We check for no storage as well, just to be paranoid;
269 : : * unused items should never have storage. Assert() that the
270 : : * invariant is respected too.
271 : : */
1113 pg@bowt.ie 272 [ + + - + ]: 9291562 : Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
273 : :
6059 tgl@sss.pgh.pa.us 274 [ + + + - ]: 9291562 : if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
6253 275 : 136010 : break;
276 : : }
277 [ + + ]: 143181 : if (offsetNumber >= limit)
278 : : {
279 : : /* the hint is wrong, so reset it */
643 peter@eisentraut.org 280 : 7171 : PageClearHasFreeLinePointers(page);
281 : : }
282 : : }
283 : : else
284 : : {
285 : : /* don't bother searching if hint says there's no free slot */
6253 tgl@sss.pgh.pa.us 286 : 10868208 : offsetNumber = limit;
287 : : }
288 : : }
289 : :
290 : : /* Reject placing items beyond the first unused line pointer */
2774 291 [ - + ]: 32522283 : if (offsetNumber > limit)
292 : : {
7570 tgl@sss.pgh.pa.us 293 [ # # ]:UBC 0 : elog(WARNING, "specified item offset is too large");
7922 294 : 0 : return InvalidOffsetNumber;
295 : : }
296 : :
297 : : /* Reject placing items beyond heap boundary, if heap */
2876 alvherre@alvh.no-ip. 298 [ + + - + ]:CBC 32522283 : if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
299 : : {
6051 tgl@sss.pgh.pa.us 300 [ # # ]:UBC 0 : elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
301 : 0 : return InvalidOffsetNumber;
302 : : }
303 : :
304 : : /*
305 : : * Compute new lower and upper pointers for page, see if it'll fit.
306 : : *
307 : : * Note: do arithmetic as signed ints, to avoid mistakes if, say,
308 : : * alignedSize > pd_upper.
309 : : */
2774 tgl@sss.pgh.pa.us 310 [ + + + + ]:CBC 32522283 : if (offsetNumber == limit || needshuffle)
8125 311 : 32319408 : lower = phdr->pd_lower + sizeof(ItemIdData);
312 : : else
313 : 202875 : lower = phdr->pd_lower;
314 : :
9036 bruce@momjian.us 315 : 32522283 : alignedSize = MAXALIGN(size);
316 : :
8125 tgl@sss.pgh.pa.us 317 : 32522283 : upper = (int) phdr->pd_upper - (int) alignedSize;
318 : :
9716 bruce@momjian.us 319 [ - + ]: 32522283 : if (lower > upper)
9357 bruce@momjian.us 320 :UBC 0 : return InvalidOffsetNumber;
321 : :
322 : : /*
323 : : * OK to insert the item. First, shuffle the existing pointers if needed.
324 : : */
643 peter@eisentraut.org 325 :CBC 32522283 : itemId = PageGetItemId(page, offsetNumber);
326 : :
7922 tgl@sss.pgh.pa.us 327 [ + + ]: 32522283 : if (needshuffle)
328 : 2950490 : memmove(itemId + 1, itemId,
329 : 2950490 : (limit - offsetNumber) * sizeof(ItemIdData));
330 : :
331 : : /* set the line pointer */
6059 332 : 32522283 : ItemIdSetNormal(itemId, upper, size);
333 : :
334 : : /*
335 : : * Items normally contain no uninitialized bytes. Core bufpage consumers
336 : : * conform, but this is not a necessary coding rule; a new index AM could
337 : : * opt to depart from it. However, data type input functions and other
338 : : * C-language functions that synthesize datums should initialize all
339 : : * bytes; datumIsEqual() relies on this. Testing here, along with the
340 : : * similar check in printtup(), helps to catch such mistakes.
341 : : *
342 : : * Values of the "name" type retrieved via index-only scans may contain
343 : : * uninitialized bytes; see comment in btrescan(). Valgrind will report
344 : : * this as an error, but it is safe to ignore.
345 : : */
346 : : VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
347 : :
348 : : /* copy the item's data onto the page */
7922 349 : 32522283 : memcpy((char *) page + upper, item, size);
350 : :
351 : : /* adjust page header */
8125 352 : 32522283 : phdr->pd_lower = (LocationIndex) lower;
353 : 32522283 : phdr->pd_upper = (LocationIndex) upper;
354 : :
9357 bruce@momjian.us 355 : 32522283 : return offsetNumber;
356 : : }
357 : :
358 : :
359 : : /*
360 : : * PageGetTempPage
361 : : * Get a temporary page in local memory for special processing.
362 : : * The returned page is not initialized at all; caller must do that.
363 : : */
364 : : Page
5641 tgl@sss.pgh.pa.us 365 : 10706 : PageGetTempPage(Page page)
366 : : {
367 : : Size pageSize;
368 : : Page temp;
369 : :
370 : 10706 : pageSize = PageGetPageSize(page);
371 : 10706 : temp = (Page) palloc(pageSize);
372 : :
373 : 10706 : return temp;
374 : : }
375 : :
376 : : /*
377 : : * PageGetTempPageCopy
378 : : * Get a temporary page in local memory for special processing.
379 : : * The page is initialized by copying the contents of the given page.
380 : : */
381 : : Page
382 : 5638 : PageGetTempPageCopy(Page page)
383 : : {
384 : : Size pageSize;
385 : : Page temp;
386 : :
9716 bruce@momjian.us 387 : 5638 : pageSize = PageGetPageSize(page);
8668 tgl@sss.pgh.pa.us 388 : 5638 : temp = (Page) palloc(pageSize);
389 : :
8209 390 : 5638 : memcpy(temp, page, pageSize);
391 : :
5641 392 : 5638 : return temp;
393 : : }
394 : :
395 : : /*
396 : : * PageGetTempPageCopySpecial
397 : : * Get a temporary page in local memory for special processing.
398 : : * The page is PageInit'd with the same special-space size as the
399 : : * given page, and the special space is copied from the given page.
400 : : */
401 : : Page
402 : 28058 : PageGetTempPageCopySpecial(Page page)
403 : : {
404 : : Size pageSize;
405 : : Page temp;
406 : :
407 : 28058 : pageSize = PageGetPageSize(page);
408 : 28058 : temp = (Page) palloc(pageSize);
409 : :
410 : 28058 : PageInit(temp, pageSize, PageGetSpecialSize(page));
411 : 28058 : memcpy(PageGetSpecialPointer(temp),
412 : 28058 : PageGetSpecialPointer(page),
413 : 28058 : PageGetSpecialSize(page));
414 : :
9357 bruce@momjian.us 415 : 28058 : return temp;
416 : : }
417 : :
418 : : /*
419 : : * PageRestoreTempPage
420 : : * Copy temporary page back to permanent page after special processing
421 : : * and release the temporary page.
422 : : */
423 : : void
10141 scrappy@hub.org 424 : 37572 : PageRestoreTempPage(Page tempPage, Page oldPage)
425 : : {
426 : : Size pageSize;
427 : :
9716 bruce@momjian.us 428 : 37572 : pageSize = PageGetPageSize(tempPage);
7922 tgl@sss.pgh.pa.us 429 : 37572 : memcpy((char *) oldPage, (char *) tempPage, pageSize);
430 : :
9716 bruce@momjian.us 431 : 37572 : pfree(tempPage);
10141 scrappy@hub.org 432 : 37572 : }
433 : :
434 : : /*
435 : : * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
436 : : */
437 : : typedef struct itemIdCompactData
438 : : {
439 : : uint16 offsetindex; /* linp array index */
440 : : int16 itemoff; /* page offset of item data */
441 : : uint16 alignedlen; /* MAXALIGN(item data len) */
442 : : } itemIdCompactData;
443 : : typedef itemIdCompactData *itemIdCompact;
444 : :
445 : : /*
446 : : * After removing or marking some line pointers unused, move the tuples to
447 : : * remove the gaps caused by the removed items and reorder them back into
448 : : * reverse line pointer order in the page.
449 : : *
450 : : * This function can often be fairly hot, so it pays to take some measures to
451 : : * make it as optimal as possible.
452 : : *
453 : : * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
454 : : * descending order of itemoff. When this is true we can just memmove()
455 : : * tuples towards the end of the page. This is quite a common case as it's
456 : : * the order that tuples are initially inserted into pages. When we call this
457 : : * function to defragment the tuples in the page then any new line pointers
458 : : * added to the page will keep that presorted order, so hitting this case is
459 : : * still very common for tables that are commonly updated.
460 : : *
461 : : * When the 'itemidbase' array is not presorted then we're unable to just
462 : : * memmove() tuples around freely. Doing so could cause us to overwrite the
463 : : * memory belonging to a tuple we've not moved yet. In this case, we copy all
464 : : * the tuples that need to be moved into a temporary buffer. We can then
465 : : * simply memcpy() out of that temp buffer back into the page at the correct
466 : : * location. Tuples are copied back into the page in the same order as the
467 : : * 'itemidbase' array, so we end up reordering the tuples back into reverse
468 : : * line pointer order. This will increase the chances of hitting the
469 : : * presorted case the next time around.
470 : : *
471 : : * Callers must ensure that nitems is > 0
472 : : */
473 : : static void
1306 drowley@postgresql.o 474 : 60345 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
475 : : {
3358 heikki.linnakangas@i 476 : 60345 : PageHeader phdr = (PageHeader) page;
477 : : Offset upper;
478 : : Offset copy_tail;
479 : : Offset copy_head;
480 : : itemIdCompact itemidptr;
481 : : int i;
482 : :
483 : : /* Code within will not work correctly if nitems == 0 */
1306 drowley@postgresql.o 484 [ - + ]: 60345 : Assert(nitems > 0);
485 : :
486 [ + + ]: 60345 : if (presorted)
487 : : {
488 : :
489 : : #ifdef USE_ASSERT_CHECKING
490 : : {
491 : : /*
492 : : * Verify we've not gotten any new callers that are incorrectly
493 : : * passing a true presorted value.
494 : : */
495 : 43360 : Offset lastoff = phdr->pd_special;
496 : :
497 [ + + ]: 1625622 : for (i = 0; i < nitems; i++)
498 : : {
499 : 1582262 : itemidptr = &itemidbase[i];
500 : :
501 [ - + ]: 1582262 : Assert(lastoff > itemidptr->itemoff);
502 : :
503 : 1582262 : lastoff = itemidptr->itemoff;
504 : : }
505 : : }
506 : : #endif /* USE_ASSERT_CHECKING */
507 : :
508 : : /*
509 : : * 'itemidbase' is already in the optimal order, i.e, lower item
510 : : * pointers have a higher offset. This allows us to memmove() the
511 : : * tuples up to the end of the page without having to worry about
512 : : * overwriting other tuples that have not been moved yet.
513 : : *
514 : : * There's a good chance that there are tuples already right at the
515 : : * end of the page that we can simply skip over because they're
516 : : * already in the correct location within the page. We'll do that
517 : : * first...
518 : : */
519 : 43360 : upper = phdr->pd_special;
520 : 43360 : i = 0;
521 : : do
522 : : {
523 : 637330 : itemidptr = &itemidbase[i];
524 [ + + ]: 637330 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
525 : 38739 : break;
526 : 598591 : upper -= itemidptr->alignedlen;
527 : :
528 : 598591 : i++;
529 [ + + ]: 598591 : } while (i < nitems);
530 : :
531 : : /*
532 : : * Now that we've found the first tuple that needs to be moved, we can
533 : : * do the tuple compactification. We try and make the least number of
534 : : * memmove() calls and only call memmove() when there's a gap. When
535 : : * we see a gap we just move all tuples after the gap up until the
536 : : * point of the last move operation.
537 : : */
538 : 43360 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
539 [ + + ]: 1027031 : for (; i < nitems; i++)
540 : : {
541 : : ItemId lp;
542 : :
543 : 983671 : itemidptr = &itemidbase[i];
544 : 983671 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
545 : :
546 [ + + ]: 983671 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
547 : : {
548 : 136675 : memmove((char *) page + upper,
549 : 136675 : page + copy_head,
550 : 136675 : copy_tail - copy_head);
551 : :
552 : : /*
553 : : * We've now moved all tuples already seen, but not the
554 : : * current tuple, so we set the copy_tail to the end of this
555 : : * tuple so it can be moved in another iteration of the loop.
556 : : */
557 : 136675 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
558 : : }
559 : : /* shift the target offset down by the length of this tuple */
560 : 983671 : upper -= itemidptr->alignedlen;
561 : : /* point the copy_head to the start of this tuple */
562 : 983671 : copy_head = itemidptr->itemoff;
563 : :
564 : : /* update the line pointer to reference the new offset */
565 : 983671 : lp->lp_off = upper;
566 : : }
567 : :
568 : : /* move the remaining tuples. */
3358 heikki.linnakangas@i 569 : 43360 : memmove((char *) page + upper,
1306 drowley@postgresql.o 570 : 43360 : page + copy_head,
571 : 43360 : copy_tail - copy_head);
572 : : }
573 : : else
574 : : {
575 : : PGAlignedBlock scratch;
576 : 16985 : char *scratchptr = scratch.data;
577 : :
578 : : /*
579 : : * Non-presorted case: The tuples in the itemidbase array may be in
580 : : * any order. So, in order to move these to the end of the page we
581 : : * must make a temp copy of each tuple that needs to be moved before
582 : : * we copy them back into the page at the new offset.
583 : : *
584 : : * If a large percentage of tuples have been pruned (>75%) then we'll
585 : : * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
586 : : * just do a single memcpy() for all tuples that need to be moved.
587 : : * When so many tuples have been removed there's likely to be a lot of
588 : : * gaps and it's unlikely that many non-movable tuples remain at the
589 : : * end of the page.
590 : : */
591 [ + + ]: 16985 : if (nitems < PageGetMaxOffsetNumber(page) / 4)
592 : : {
593 : 758 : i = 0;
594 : : do
595 : : {
596 : 14209 : itemidptr = &itemidbase[i];
597 : 14209 : memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
598 : 14209 : itemidptr->alignedlen);
599 : 14209 : i++;
600 [ + + ]: 14209 : } while (i < nitems);
601 : :
602 : : /* Set things up for the compactification code below */
603 : 758 : i = 0;
604 : 758 : itemidptr = &itemidbase[0];
605 : 758 : upper = phdr->pd_special;
606 : : }
607 : : else
608 : : {
609 : 16227 : upper = phdr->pd_special;
610 : :
611 : : /*
612 : : * Many tuples are likely to already be in the correct location.
613 : : * There's no need to copy these into the temp buffer. Instead
614 : : * we'll just skip forward in the itemidbase array to the position
615 : : * that we do need to move tuples from so that the code below just
616 : : * leaves these ones alone.
617 : : */
618 : 16227 : i = 0;
619 : : do
620 : : {
621 : 325972 : itemidptr = &itemidbase[i];
622 [ + + ]: 325972 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
623 : 16227 : break;
624 : 309745 : upper -= itemidptr->alignedlen;
625 : :
626 : 309745 : i++;
627 [ + - ]: 309745 : } while (i < nitems);
628 : :
629 : : /* Copy all tuples that need to be moved into the temp buffer */
630 : 16227 : memcpy(scratchptr + phdr->pd_upper,
631 : 16227 : page + phdr->pd_upper,
632 : 16227 : upper - phdr->pd_upper);
633 : : }
634 : :
635 : : /*
636 : : * Do the tuple compactification. itemidptr is already pointing to
637 : : * the first tuple that we're going to move. Here we collapse the
638 : : * memcpy calls for adjacent tuples into a single call. This is done
639 : : * by delaying the memcpy call until we find a gap that needs to be
640 : : * closed.
641 : : */
642 : 16985 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
643 [ + + ]: 1966699 : for (; i < nitems; i++)
644 : : {
645 : : ItemId lp;
646 : :
647 : 1949714 : itemidptr = &itemidbase[i];
648 : 1949714 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
649 : :
650 : : /* copy pending tuples when we detect a gap */
651 [ + + ]: 1949714 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
652 : : {
653 : 549683 : memcpy((char *) page + upper,
654 : 549683 : scratchptr + copy_head,
655 : 549683 : copy_tail - copy_head);
656 : :
657 : : /*
658 : : * We've now copied all tuples already seen, but not the
659 : : * current tuple, so we set the copy_tail to the end of this
660 : : * tuple.
661 : : */
662 : 549683 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
663 : : }
664 : : /* shift the target offset down by the length of this tuple */
665 : 1949714 : upper -= itemidptr->alignedlen;
666 : : /* point the copy_head to the start of this tuple */
667 : 1949714 : copy_head = itemidptr->itemoff;
668 : :
669 : : /* update the line pointer to reference the new offset */
670 : 1949714 : lp->lp_off = upper;
671 : : }
672 : :
673 : : /* Copy the remaining chunk */
674 : 16985 : memcpy((char *) page + upper,
675 : 16985 : scratchptr + copy_head,
676 : 16985 : copy_tail - copy_head);
677 : : }
678 : :
3358 heikki.linnakangas@i 679 : 60345 : phdr->pd_upper = upper;
680 : 60345 : }
681 : :
682 : : /*
683 : : * PageRepairFragmentation
684 : : *
685 : : * Frees fragmented space on a heap page following pruning.
686 : : *
687 : : * This routine is usable for heap pages only, but see PageIndexMultiDelete.
688 : : *
689 : : * This routine removes unused line pointers from the end of the line pointer
690 : : * array. This is possible when dead heap-only tuples get removed by pruning,
691 : : * especially when there were HOT chains with several tuples each beforehand.
692 : : *
693 : : * Caller had better have a full cleanup lock on page's buffer. As a side
694 : : * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
695 : : * needed. Caller might also need to account for a reduction in the length of
696 : : * the line pointer array following array truncation.
697 : : */
698 : : void
6051 tgl@sss.pgh.pa.us 699 : 54312 : PageRepairFragmentation(Page page)
700 : : {
8209 701 : 54312 : Offset pd_lower = ((PageHeader) page)->pd_lower;
702 : 54312 : Offset pd_upper = ((PageHeader) page)->pd_upper;
703 : 54312 : Offset pd_special = ((PageHeader) page)->pd_special;
704 : : Offset last_offset;
705 : : itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
706 : : itemIdCompact itemidptr;
707 : : ItemId lp;
708 : : int nline,
709 : : nstorage,
710 : : nunused;
738 pg@bowt.ie 711 : 54312 : OffsetNumber finalusedlp = InvalidOffsetNumber;
712 : : int i;
713 : : Size totallen;
1306 drowley@postgresql.o 714 : 54312 : bool presorted = true; /* For now */
715 : :
716 : : /*
717 : : * It's worth the trouble to be more paranoid here than in most places,
718 : : * because we are about to reshuffle data in (what is usually) a shared
719 : : * disk buffer. If we aren't careful then corrupted pointers, lengths,
720 : : * etc could cause us to clobber adjacent disk buffers, spreading the data
721 : : * loss further. So, check everything.
722 : : */
7957 bruce@momjian.us 723 [ + - + - ]: 54312 : if (pd_lower < SizeOfPageHeaderData ||
8209 tgl@sss.pgh.pa.us 724 [ + - ]: 54312 : pd_lower > pd_upper ||
725 [ + - ]: 54312 : pd_upper > pd_special ||
726 : 54312 : pd_special > BLCKSZ ||
727 [ - + ]: 54312 : pd_special != MAXALIGN(pd_special))
7570 tgl@sss.pgh.pa.us 728 [ # # ]:UBC 0 : ereport(ERROR,
729 : : (errcode(ERRCODE_DATA_CORRUPTED),
730 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
731 : : pd_lower, pd_upper, pd_special)));
732 : :
733 : : /*
734 : : * Run through the line pointer array and collect data about live items.
735 : : */
8209 tgl@sss.pgh.pa.us 736 :CBC 54312 : nline = PageGetMaxOffsetNumber(page);
2354 737 : 54312 : itemidptr = itemidbase;
738 : 54312 : nunused = totallen = 0;
1306 drowley@postgresql.o 739 : 54312 : last_offset = pd_special;
6051 tgl@sss.pgh.pa.us 740 [ + + ]: 4737745 : for (i = FirstOffsetNumber; i <= nline; i++)
741 : : {
742 : 4683433 : lp = PageGetItemId(page, i);
6779 bruce@momjian.us 743 [ + + ]: 4683433 : if (ItemIdIsUsed(lp))
744 : : {
6059 tgl@sss.pgh.pa.us 745 [ + + ]: 4507011 : if (ItemIdHasStorage(lp))
746 : : {
2354 747 : 1819947 : itemidptr->offsetindex = i - 1;
748 : 1819947 : itemidptr->itemoff = ItemIdGetOffset(lp);
749 : :
1306 drowley@postgresql.o 750 [ + + ]: 1819947 : if (last_offset > itemidptr->itemoff)
751 : 1352660 : last_offset = itemidptr->itemoff;
752 : : else
753 : 467287 : presorted = false;
754 : :
2354 tgl@sss.pgh.pa.us 755 [ + - - + : 1819947 : if (unlikely(itemidptr->itemoff < (int) pd_upper ||
- + ]
756 : : itemidptr->itemoff >= (int) pd_special))
2354 tgl@sss.pgh.pa.us 757 [ # # ]:UBC 0 : ereport(ERROR,
758 : : (errcode(ERRCODE_DATA_CORRUPTED),
759 : : errmsg("corrupted line pointer: %u",
760 : : itemidptr->itemoff)));
2354 tgl@sss.pgh.pa.us 761 :CBC 1819947 : itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
762 : 1819947 : totallen += itemidptr->alignedlen;
763 : 1819947 : itemidptr++;
764 : : }
765 : :
738 pg@bowt.ie 766 : 4507011 : finalusedlp = i; /* Could be the final non-LP_UNUSED item */
767 : : }
768 : : else
769 : : {
770 : : /* Unused entries should have lp_len = 0, but make sure */
771 [ - + ]: 176422 : Assert(!ItemIdHasStorage(lp));
6059 tgl@sss.pgh.pa.us 772 : 176422 : ItemIdSetUnused(lp);
6051 773 : 176422 : nunused++;
774 : : }
775 : : }
776 : :
2354 777 : 54312 : nstorage = itemidptr - itemidbase;
6059 778 [ + + ]: 54312 : if (nstorage == 0)
779 : : {
780 : : /* Page is completely empty, so just reset it quickly */
6887 781 : 12030 : ((PageHeader) page)->pd_upper = pd_special;
782 : : }
783 : : else
784 : : {
785 : : /* Need to compact the page the hard way */
8209 786 [ - + ]: 42282 : if (totallen > (Size) (pd_special - pd_lower))
7570 tgl@sss.pgh.pa.us 787 [ # # ]:UBC 0 : ereport(ERROR,
788 : : (errcode(ERRCODE_DATA_CORRUPTED),
789 : : errmsg("corrupted item lengths: total %u, available space %u",
790 : : (unsigned int) totallen, pd_special - pd_lower)));
791 : :
1306 drowley@postgresql.o 792 :CBC 42282 : compactify_tuples(itemidbase, nstorage, page, presorted);
793 : : }
794 : :
738 pg@bowt.ie 795 [ + + ]: 54312 : if (finalusedlp != nline)
796 : : {
797 : : /* The last line pointer is not the last used line pointer */
703 tgl@sss.pgh.pa.us 798 : 1812 : int nunusedend = nline - finalusedlp;
799 : :
738 pg@bowt.ie 800 [ + - - + ]: 1812 : Assert(nunused >= nunusedend && nunusedend > 0);
801 : :
802 : : /* remove trailing unused line pointers from the count */
803 : 1812 : nunused -= nunusedend;
804 : : /* truncate the line pointer array */
805 : 1812 : ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
806 : : }
807 : :
808 : : /* Set hint bit for PageAddItemExtended */
6051 tgl@sss.pgh.pa.us 809 [ + + ]: 54312 : if (nunused > 0)
6253 810 : 12234 : PageSetHasFreeLinePointers(page);
811 : : else
812 : 42078 : PageClearHasFreeLinePointers(page);
10141 scrappy@hub.org 813 : 54312 : }
814 : :
815 : : /*
816 : : * PageTruncateLinePointerArray
817 : : *
818 : : * Removes unused line pointers at the end of the line pointer array.
819 : : *
820 : : * This routine is usable for heap pages only. It is called by VACUUM during
821 : : * its second pass over the heap. We expect at least one LP_UNUSED line
822 : : * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
823 : : * it just set to LP_UNUSED then it should not call here).
824 : : *
825 : : * We avoid truncating the line pointer array to 0 items, if necessary by
826 : : * leaving behind a single remaining LP_UNUSED item. This is a little
827 : : * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
828 : : * page behind.
829 : : *
830 : : * Caller can have either an exclusive lock or a full cleanup lock on page's
831 : : * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
832 : : * on whether or not we leave behind any remaining LP_UNUSED items.
833 : : */
834 : : void
1103 pg@bowt.ie 835 : 16148 : PageTruncateLinePointerArray(Page page)
836 : : {
837 : 16148 : PageHeader phdr = (PageHeader) page;
838 : 16148 : bool countdone = false,
839 : 16148 : sethint = false;
840 : 16148 : int nunusedend = 0;
841 : :
842 : : /* Scan line pointer array back-to-front */
843 [ + + ]: 991428 : for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
844 : : {
845 : 990950 : ItemId lp = PageGetItemId(page, i);
846 : :
847 [ + + + + ]: 990950 : if (!countdone && i > FirstOffsetNumber)
848 : : {
849 : : /*
850 : : * Still determining which line pointers from the end of the array
851 : : * will be truncated away. Either count another line pointer as
852 : : * safe to truncate, or notice that it's not safe to truncate
853 : : * additional line pointers (stop counting line pointers).
854 : : */
855 [ + + ]: 899921 : if (!ItemIdIsUsed(lp))
856 : 891785 : nunusedend++;
857 : : else
858 : 8136 : countdone = true;
859 : : }
860 : : else
861 : : {
862 : : /*
863 : : * Once we've stopped counting we still need to figure out if
864 : : * there are any remaining LP_UNUSED line pointers somewhere more
865 : : * towards the front of the array.
866 : : */
867 [ + + ]: 91029 : if (!ItemIdIsUsed(lp))
868 : : {
869 : : /*
870 : : * This is an unused line pointer that we won't be truncating
871 : : * away -- so there is at least one. Set hint on page.
872 : : */
873 : 15670 : sethint = true;
874 : 15670 : break;
875 : : }
876 : : }
877 : : }
878 : :
879 [ + + ]: 16148 : if (nunusedend > 0)
880 : : {
881 : 10166 : phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
882 : :
883 : : #ifdef CLOBBER_FREED_MEMORY
884 : 10166 : memset((char *) page + phdr->pd_lower, 0x7F,
885 : : sizeof(ItemIdData) * nunusedend);
886 : : #endif
887 : : }
888 : : else
889 [ - + ]: 5982 : Assert(sethint);
890 : :
891 : : /* Set hint bit for PageAddItemExtended */
892 [ + + ]: 16148 : if (sethint)
893 : 15670 : PageSetHasFreeLinePointers(page);
894 : : else
895 : 478 : PageClearHasFreeLinePointers(page);
896 : 16148 : }
897 : :
898 : : /*
899 : : * PageGetFreeSpace
900 : : * Returns the size of the free (allocatable) space on a page,
901 : : * reduced by the space needed for a new line pointer.
902 : : *
903 : : * Note: this should usually only be used on index pages. Use
904 : : * PageGetHeapFreeSpace on heap pages.
905 : : */
906 : : Size
10141 scrappy@hub.org 907 : 26934591 : PageGetFreeSpace(Page page)
908 : : {
909 : : int space;
910 : :
911 : : /*
912 : : * Use signed arithmetic here so that we behave sensibly if pd_lower >
913 : : * pd_upper.
914 : : */
8125 tgl@sss.pgh.pa.us 915 : 26934591 : space = (int) ((PageHeader) page)->pd_upper -
916 : 26934591 : (int) ((PageHeader) page)->pd_lower;
917 : :
918 [ + + ]: 26934591 : if (space < (int) sizeof(ItemIdData))
9357 bruce@momjian.us 919 : 5555 : return 0;
6262 920 : 26929036 : space -= sizeof(ItemIdData);
921 : :
922 : 26929036 : return (Size) space;
923 : : }
924 : :
925 : : /*
926 : : * PageGetFreeSpaceForMultipleTuples
927 : : * Returns the size of the free (allocatable) space on a page,
928 : : * reduced by the space needed for multiple new line pointers.
929 : : *
930 : : * Note: this should usually only be used on index pages. Use
931 : : * PageGetHeapFreeSpace on heap pages.
932 : : */
933 : : Size
2603 rhaas@postgresql.org 934 : 65652 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
935 : : {
936 : : int space;
937 : :
938 : : /*
939 : : * Use signed arithmetic here so that we behave sensibly if pd_lower >
940 : : * pd_upper.
941 : : */
942 : 65652 : space = (int) ((PageHeader) page)->pd_upper -
943 : 65652 : (int) ((PageHeader) page)->pd_lower;
944 : :
945 [ - + ]: 65652 : if (space < (int) (ntups * sizeof(ItemIdData)))
2603 rhaas@postgresql.org 946 :UBC 0 : return 0;
2603 rhaas@postgresql.org 947 :CBC 65652 : space -= ntups * sizeof(ItemIdData);
948 : :
949 : 65652 : return (Size) space;
950 : : }
951 : :
952 : : /*
953 : : * PageGetExactFreeSpace
954 : : * Returns the size of the free (allocatable) space on a page,
955 : : * without any consideration for adding/removing line pointers.
956 : : */
957 : : Size
6262 bruce@momjian.us 958 : 1636761 : PageGetExactFreeSpace(Page page)
959 : : {
960 : : int space;
961 : :
962 : : /*
963 : : * Use signed arithmetic here so that we behave sensibly if pd_lower >
964 : : * pd_upper.
965 : : */
966 : 1636761 : space = (int) ((PageHeader) page)->pd_upper -
967 : 1636761 : (int) ((PageHeader) page)->pd_lower;
968 : :
5908 tgl@sss.pgh.pa.us 969 [ - + ]: 1636761 : if (space < 0)
5908 tgl@sss.pgh.pa.us 970 :UBC 0 : return 0;
971 : :
8125 tgl@sss.pgh.pa.us 972 :CBC 1636761 : return (Size) space;
973 : : }
974 : :
975 : :
976 : : /*
977 : : * PageGetHeapFreeSpace
978 : : * Returns the size of the free (allocatable) space on a page,
979 : : * reduced by the space needed for a new line pointer.
980 : : *
981 : : * The difference between this and PageGetFreeSpace is that this will return
982 : : * zero if there are already MaxHeapTuplesPerPage line pointers in the page
983 : : * and none are free. We use this to enforce that no more than
984 : : * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
985 : : * no more tuples than that could fit anyway, in the presence of redirected
986 : : * or dead line pointers it'd be possible to have too many line pointers.
987 : : * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
988 : : * on the number of line pointers, we make this extra check.)
989 : : */
990 : : Size
6051 991 : 12886143 : PageGetHeapFreeSpace(Page page)
992 : : {
993 : : Size space;
994 : :
995 : 12886143 : space = PageGetFreeSpace(page);
996 [ + + ]: 12886143 : if (space > 0)
997 : : {
998 : : OffsetNumber offnum,
999 : : nline;
1000 : :
1001 : : /*
1002 : : * Are there already MaxHeapTuplesPerPage line pointers in the page?
1003 : : */
1004 : 12868200 : nline = PageGetMaxOffsetNumber(page);
1005 [ + + ]: 12868200 : if (nline >= MaxHeapTuplesPerPage)
1006 : : {
643 peter@eisentraut.org 1007 [ + + ]: 4928 : if (PageHasFreeLinePointers(page))
1008 : : {
1009 : : /*
1010 : : * Since this is just a hint, we must confirm that there is
1011 : : * indeed a free line pointer
1012 : : */
5815 bruce@momjian.us 1013 [ + + ]: 412878 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1014 : : {
5995 1015 : 412813 : ItemId lp = PageGetItemId(page, offnum);
1016 : :
6051 tgl@sss.pgh.pa.us 1017 [ + + ]: 412813 : if (!ItemIdIsUsed(lp))
1018 : 3607 : break;
1019 : : }
1020 : :
1021 [ + + ]: 3672 : if (offnum > nline)
1022 : : {
1023 : : /*
1024 : : * The hint is wrong, but we can't clear it here since we
1025 : : * don't have the ability to mark the page dirty.
1026 : : */
1027 : 65 : space = 0;
1028 : : }
1029 : : }
1030 : : else
1031 : : {
1032 : : /*
1033 : : * Although the hint might be wrong, PageAddItem will believe
1034 : : * it anyway, so we must believe it too.
1035 : : */
1036 : 1256 : space = 0;
1037 : : }
1038 : : }
1039 : : }
1040 : 12886143 : return space;
1041 : : }
1042 : :
1043 : :
1044 : : /*
1045 : : * PageIndexTupleDelete
1046 : : *
1047 : : * This routine does the work of removing a tuple from an index page.
1048 : : *
1049 : : * Unlike heap pages, we compact out the line pointer for the removed tuple.
1050 : : */
1051 : : void
10141 scrappy@hub.org 1052 : 406188 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
1053 : : {
8125 tgl@sss.pgh.pa.us 1054 : 406188 : PageHeader phdr = (PageHeader) page;
1055 : : char *addr;
1056 : : ItemId tup;
1057 : : Size size;
1058 : : unsigned offset;
1059 : : int nbytes;
1060 : : int offidx;
1061 : : int nline;
1062 : :
1063 : : /*
1064 : : * As with PageRepairFragmentation, paranoia seems justified.
1065 : : */
7957 bruce@momjian.us 1066 [ + - ]: 406188 : if (phdr->pd_lower < SizeOfPageHeaderData ||
8125 tgl@sss.pgh.pa.us 1067 [ + - ]: 406188 : phdr->pd_lower > phdr->pd_upper ||
1068 [ + - ]: 406188 : phdr->pd_upper > phdr->pd_special ||
2774 1069 [ + - ]: 406188 : phdr->pd_special > BLCKSZ ||
1070 [ - + ]: 406188 : phdr->pd_special != MAXALIGN(phdr->pd_special))
7570 tgl@sss.pgh.pa.us 1071 [ # # ]:UBC 0 : ereport(ERROR,
1072 : : (errcode(ERRCODE_DATA_CORRUPTED),
1073 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1074 : : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1075 : :
8125 tgl@sss.pgh.pa.us 1076 :CBC 406188 : nline = PageGetMaxOffsetNumber(page);
1077 [ + - - + ]: 406188 : if ((int) offnum <= 0 || (int) offnum > nline)
7570 tgl@sss.pgh.pa.us 1078 [ # # ]:UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1079 : :
1080 : : /* change offset number to offset index */
9716 bruce@momjian.us 1081 :CBC 406188 : offidx = offnum - 1;
1082 : :
1083 : 406188 : tup = PageGetItemId(page, offnum);
6059 tgl@sss.pgh.pa.us 1084 [ - + ]: 406188 : Assert(ItemIdHasStorage(tup));
9716 bruce@momjian.us 1085 : 406188 : size = ItemIdGetLength(tup);
8125 tgl@sss.pgh.pa.us 1086 : 406188 : offset = ItemIdGetOffset(tup);
1087 : :
1088 [ + - + - ]: 406188 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
2774 1089 [ - + ]: 406188 : offset != MAXALIGN(offset))
7570 tgl@sss.pgh.pa.us 1090 [ # # ]:UBC 0 : ereport(ERROR,
1091 : : (errcode(ERRCODE_DATA_CORRUPTED),
1092 : : errmsg("corrupted line pointer: offset = %u, size = %u",
1093 : : offset, (unsigned int) size)));
1094 : :
1095 : : /* Amount of space to actually be deleted */
2774 tgl@sss.pgh.pa.us 1096 :CBC 406188 : size = MAXALIGN(size);
1097 : :
1098 : : /*
1099 : : * First, we want to get rid of the pd_linp entry for the index tuple. We
1100 : : * copy all subsequent linp's back one slot in the array. We don't use
1101 : : * PageGetItemId, because we are manipulating the _array_, not individual
1102 : : * linp's.
1103 : : */
9716 bruce@momjian.us 1104 : 406188 : nbytes = phdr->pd_lower -
1105 : 406188 : ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1106 : :
7922 tgl@sss.pgh.pa.us 1107 [ + + ]: 406188 : if (nbytes > 0)
1108 : 396911 : memmove((char *) &(phdr->pd_linp[offidx]),
1109 : 396911 : (char *) &(phdr->pd_linp[offidx + 1]),
1110 : : nbytes);
1111 : :
1112 : : /*
1113 : : * Now move everything between the old upper bound (beginning of tuple
1114 : : * space) and the beginning of the deleted tuple forward, so that space in
1115 : : * the middle of the page is left free. If we've just deleted the tuple
1116 : : * at the beginning of tuple space, then there's no need to do the copy.
1117 : : */
1118 : :
1119 : : /* beginning of tuple space */
8125 1120 : 406188 : addr = (char *) page + phdr->pd_upper;
1121 : :
1122 [ + + ]: 406188 : if (offset > phdr->pd_upper)
2774 1123 : 396301 : memmove(addr + size, addr, offset - phdr->pd_upper);
1124 : :
1125 : : /* adjust free space boundary pointers */
9716 bruce@momjian.us 1126 : 406188 : phdr->pd_upper += size;
1127 : 406188 : phdr->pd_lower -= sizeof(ItemIdData);
1128 : :
1129 : : /*
1130 : : * Finally, we need to adjust the linp entries that remain.
1131 : : *
1132 : : * Anything that used to be before the deleted tuple's data was moved
1133 : : * forward by the size of the deleted tuple.
1134 : : */
1135 [ + + ]: 406188 : if (!PageIsEmpty(page))
1136 : : {
1137 : : int i;
1138 : :
8125 tgl@sss.pgh.pa.us 1139 : 405395 : nline--; /* there's one less than when we started */
7764 bruce@momjian.us 1140 [ + + ]: 71306945 : for (i = 1; i <= nline; i++)
1141 : : {
643 peter@eisentraut.org 1142 : 70901550 : ItemId ii = PageGetItemId(page, i);
1143 : :
6059 tgl@sss.pgh.pa.us 1144 [ - + ]: 70901550 : Assert(ItemIdHasStorage(ii));
6779 bruce@momjian.us 1145 [ + + ]: 70901550 : if (ItemIdGetOffset(ii) <= offset)
7250 1146 : 46505929 : ii->lp_off += size;
1147 : : }
1148 : : }
10141 scrappy@hub.org 1149 : 406188 : }
1150 : :
1151 : :
1152 : : /*
1153 : : * PageIndexMultiDelete
1154 : : *
1155 : : * This routine handles the case of deleting multiple tuples from an
1156 : : * index page at once. It is considerably faster than a loop around
1157 : : * PageIndexTupleDelete ... however, the caller *must* supply the array
1158 : : * of item numbers to be deleted in item number order!
1159 : : */
1160 : : void
6963 tgl@sss.pgh.pa.us 1161 : 20582 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
1162 : : {
1163 : 20582 : PageHeader phdr = (PageHeader) page;
1164 : 20582 : Offset pd_lower = phdr->pd_lower;
1165 : 20582 : Offset pd_upper = phdr->pd_upper;
1166 : 20582 : Offset pd_special = phdr->pd_special;
1167 : : Offset last_offset;
1168 : : itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
1169 : : ItemIdData newitemids[MaxIndexTuplesPerPage];
1170 : : itemIdCompact itemidptr;
1171 : : ItemId lp;
1172 : : int nline,
1173 : : nused;
1174 : : Size totallen;
1175 : : Size size;
1176 : : unsigned offset;
1177 : : int nextitm;
1178 : : OffsetNumber offnum;
1306 drowley@postgresql.o 1179 : 20582 : bool presorted = true; /* For now */
1180 : :
3253 heikki.linnakangas@i 1181 [ - + ]: 20582 : Assert(nitems <= MaxIndexTuplesPerPage);
1182 : :
1183 : : /*
1184 : : * If there aren't very many items to delete, then retail
1185 : : * PageIndexTupleDelete is the best way. Delete the items in reverse
1186 : : * order so we don't have to think about adjusting item numbers for
1187 : : * previous deletions.
1188 : : *
1189 : : * TODO: tune the magic number here
1190 : : */
6963 tgl@sss.pgh.pa.us 1191 [ + + ]: 20582 : if (nitems <= 2)
1192 : : {
1193 [ + + ]: 4764 : while (--nitems >= 0)
1194 : 2673 : PageIndexTupleDelete(page, itemnos[nitems]);
1195 : 2091 : return;
1196 : : }
1197 : :
1198 : : /*
1199 : : * As with PageRepairFragmentation, paranoia seems justified.
1200 : : */
1201 [ + - + - ]: 18491 : if (pd_lower < SizeOfPageHeaderData ||
1202 [ + - ]: 18491 : pd_lower > pd_upper ||
1203 [ + - ]: 18491 : pd_upper > pd_special ||
1204 : 18491 : pd_special > BLCKSZ ||
1205 [ - + ]: 18491 : pd_special != MAXALIGN(pd_special))
6963 tgl@sss.pgh.pa.us 1206 [ # # ]:UBC 0 : ereport(ERROR,
1207 : : (errcode(ERRCODE_DATA_CORRUPTED),
1208 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1209 : : pd_lower, pd_upper, pd_special)));
1210 : :
1211 : : /*
1212 : : * Scan the line pointer array and build a list of just the ones we are
1213 : : * going to keep. Notice we do not modify the page yet, since we are
1214 : : * still validity-checking.
1215 : : */
6963 tgl@sss.pgh.pa.us 1216 :CBC 18491 : nline = PageGetMaxOffsetNumber(page);
1217 : 18491 : itemidptr = itemidbase;
1218 : 18491 : totallen = 0;
1219 : 18491 : nused = 0;
1220 : 18491 : nextitm = 0;
1306 drowley@postgresql.o 1221 : 18491 : last_offset = pd_special;
5815 bruce@momjian.us 1222 [ + + ]: 4078482 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1223 : : {
6963 tgl@sss.pgh.pa.us 1224 : 4059991 : lp = PageGetItemId(page, offnum);
6059 1225 [ - + ]: 4059991 : Assert(ItemIdHasStorage(lp));
6963 1226 : 4059991 : size = ItemIdGetLength(lp);
1227 : 4059991 : offset = ItemIdGetOffset(lp);
1228 [ + - ]: 4059991 : if (offset < pd_upper ||
1229 [ + - ]: 4059991 : (offset + size) > pd_special ||
1230 [ - + ]: 4059991 : offset != MAXALIGN(offset))
6963 tgl@sss.pgh.pa.us 1231 [ # # ]:UBC 0 : ereport(ERROR,
1232 : : (errcode(ERRCODE_DATA_CORRUPTED),
1233 : : errmsg("corrupted line pointer: offset = %u, size = %u",
1234 : : offset, (unsigned int) size)));
1235 : :
6963 tgl@sss.pgh.pa.us 1236 [ + + + + ]:CBC 4059991 : if (nextitm < nitems && offnum == itemnos[nextitm])
1237 : : {
1238 : : /* skip item to be deleted */
1239 : 2038217 : nextitm++;
1240 : : }
1241 : : else
1242 : : {
2489 1243 : 2021774 : itemidptr->offsetindex = nused; /* where it will go */
6963 1244 : 2021774 : itemidptr->itemoff = offset;
1245 : :
1306 drowley@postgresql.o 1246 [ + + ]: 2021774 : if (last_offset > itemidptr->itemoff)
1247 : 1019695 : last_offset = itemidptr->itemoff;
1248 : : else
1249 : 1002079 : presorted = false;
1250 : :
6963 tgl@sss.pgh.pa.us 1251 : 2021774 : itemidptr->alignedlen = MAXALIGN(size);
1252 : 2021774 : totallen += itemidptr->alignedlen;
3358 heikki.linnakangas@i 1253 : 2021774 : newitemids[nused] = *lp;
6963 tgl@sss.pgh.pa.us 1254 : 2021774 : itemidptr++;
1255 : 2021774 : nused++;
1256 : : }
1257 : : }
1258 : :
1259 : : /* this will catch invalid or out-of-order itemnos[] */
1260 [ - + ]: 18491 : if (nextitm != nitems)
6963 tgl@sss.pgh.pa.us 1261 [ # # ]:UBC 0 : elog(ERROR, "incorrect index offsets supplied");
1262 : :
6963 tgl@sss.pgh.pa.us 1263 [ - + ]:CBC 18491 : if (totallen > (Size) (pd_special - pd_lower))
6963 tgl@sss.pgh.pa.us 1264 [ # # ]:UBC 0 : ereport(ERROR,
1265 : : (errcode(ERRCODE_DATA_CORRUPTED),
1266 : : errmsg("corrupted item lengths: total %u, available space %u",
1267 : : (unsigned int) totallen, pd_special - pd_lower)));
1268 : :
1269 : : /*
1270 : : * Looks good. Overwrite the line pointers with the copy, from which we've
1271 : : * removed all the unused items.
1272 : : */
3358 heikki.linnakangas@i 1273 :CBC 18491 : memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
6887 tgl@sss.pgh.pa.us 1274 : 18491 : phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1275 : :
1276 : : /* and compactify the tuple data */
1306 drowley@postgresql.o 1277 [ + + ]: 18491 : if (nused > 0)
1278 : 18063 : compactify_tuples(itemidbase, nused, page, presorted);
1279 : : else
1280 : 428 : phdr->pd_upper = pd_special;
1281 : : }
1282 : :
1283 : :
1284 : : /*
1285 : : * PageIndexTupleDeleteNoCompact
1286 : : *
1287 : : * Remove the specified tuple from an index page, but set its line pointer
1288 : : * to "unused" instead of compacting it out, except that it can be removed
1289 : : * if it's the last line pointer on the page.
1290 : : *
1291 : : * This is used for index AMs that require that existing TIDs of live tuples
1292 : : * remain unchanged, and are willing to allow unused line pointers instead.
1293 : : */
1294 : : void
2774 tgl@sss.pgh.pa.us 1295 : 635 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
1296 : : {
3446 alvherre@alvh.no-ip. 1297 : 635 : PageHeader phdr = (PageHeader) page;
1298 : : char *addr;
1299 : : ItemId tup;
1300 : : Size size;
1301 : : unsigned offset;
1302 : : int nline;
1303 : :
1304 : : /*
1305 : : * As with PageRepairFragmentation, paranoia seems justified.
1306 : : */
2774 tgl@sss.pgh.pa.us 1307 [ + - ]: 635 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1308 [ + - ]: 635 : phdr->pd_lower > phdr->pd_upper ||
1309 [ + - ]: 635 : phdr->pd_upper > phdr->pd_special ||
1310 [ + - ]: 635 : phdr->pd_special > BLCKSZ ||
1311 [ - + ]: 635 : phdr->pd_special != MAXALIGN(phdr->pd_special))
3446 alvherre@alvh.no-ip. 1312 [ # # ]:UBC 0 : ereport(ERROR,
1313 : : (errcode(ERRCODE_DATA_CORRUPTED),
1314 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1315 : : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1316 : :
2774 tgl@sss.pgh.pa.us 1317 :CBC 635 : nline = PageGetMaxOffsetNumber(page);
1318 [ + - - + ]: 635 : if ((int) offnum <= 0 || (int) offnum > nline)
2774 tgl@sss.pgh.pa.us 1319 [ # # ]:UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1320 : :
2774 tgl@sss.pgh.pa.us 1321 :CBC 635 : tup = PageGetItemId(page, offnum);
1322 [ - + ]: 635 : Assert(ItemIdHasStorage(tup));
1323 : 635 : size = ItemIdGetLength(tup);
1324 : 635 : offset = ItemIdGetOffset(tup);
1325 : :
1326 [ + - + - ]: 635 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1327 [ - + ]: 635 : offset != MAXALIGN(offset))
2774 tgl@sss.pgh.pa.us 1328 [ # # ]:UBC 0 : ereport(ERROR,
1329 : : (errcode(ERRCODE_DATA_CORRUPTED),
1330 : : errmsg("corrupted line pointer: offset = %u, size = %u",
1331 : : offset, (unsigned int) size)));
1332 : :
1333 : : /* Amount of space to actually be deleted */
2774 tgl@sss.pgh.pa.us 1334 :CBC 635 : size = MAXALIGN(size);
1335 : :
1336 : : /*
1337 : : * Either set the line pointer to "unused", or zap it if it's the last
1338 : : * one. (Note: it's possible that the next-to-last one(s) are already
1339 : : * unused, but we do not trouble to try to compact them out if so.)
1340 : : */
1341 [ + + ]: 635 : if ((int) offnum < nline)
1342 : 596 : ItemIdSetUnused(tup);
1343 : : else
1344 : : {
1345 : 39 : phdr->pd_lower -= sizeof(ItemIdData);
1346 : 39 : nline--; /* there's one less than when we started */
1347 : : }
1348 : :
1349 : : /*
1350 : : * Now move everything between the old upper bound (beginning of tuple
1351 : : * space) and the beginning of the deleted tuple forward, so that space in
1352 : : * the middle of the page is left free. If we've just deleted the tuple
1353 : : * at the beginning of tuple space, then there's no need to do the copy.
1354 : : */
1355 : :
1356 : : /* beginning of tuple space */
1357 : 635 : addr = (char *) page + phdr->pd_upper;
1358 : :
1359 [ + + ]: 635 : if (offset > phdr->pd_upper)
1360 : 596 : memmove(addr + size, addr, offset - phdr->pd_upper);
1361 : :
1362 : : /* adjust free space boundary pointer */
1363 : 635 : phdr->pd_upper += size;
1364 : :
1365 : : /*
1366 : : * Finally, we need to adjust the linp entries that remain.
1367 : : *
1368 : : * Anything that used to be before the deleted tuple's data was moved
1369 : : * forward by the size of the deleted tuple.
1370 : : */
1371 [ + + ]: 635 : if (!PageIsEmpty(page))
1372 : : {
1373 : : int i;
1374 : :
1375 [ + + ]: 172666 : for (i = 1; i <= nline; i++)
1376 : : {
643 peter@eisentraut.org 1377 : 172036 : ItemId ii = PageGetItemId(page, i);
1378 : :
2774 tgl@sss.pgh.pa.us 1379 [ + + + + ]: 172036 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1380 : 84490 : ii->lp_off += size;
1381 : : }
1382 : : }
3446 alvherre@alvh.no-ip. 1383 : 635 : }
1384 : :
1385 : :
1386 : : /*
1387 : : * PageIndexTupleOverwrite
1388 : : *
1389 : : * Replace a specified tuple on an index page.
1390 : : *
1391 : : * The new tuple is placed exactly where the old one had been, shifting
1392 : : * other tuples' data up or down as needed to keep the page compacted.
1393 : : * This is better than deleting and reinserting the tuple, because it
1394 : : * avoids any data shifting when the tuple size doesn't change; and
1395 : : * even when it does, we avoid moving the line pointers around.
1396 : : * This could be used by an index AM that doesn't want to unset the
1397 : : * LP_DEAD bit when it happens to be set. It could conceivably also be
1398 : : * used by an index AM that cares about the physical order of tuples as
1399 : : * well as their logical/ItemId order.
1400 : : *
1401 : : * If there's insufficient space for the new tuple, return false. Other
1402 : : * errors represent data-corruption problems, so we just elog.
1403 : : */
1404 : : bool
2774 tgl@sss.pgh.pa.us 1405 : 451857 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
1406 : : Item newtup, Size newsize)
1407 : : {
1408 : 451857 : PageHeader phdr = (PageHeader) page;
1409 : : ItemId tupid;
1410 : : int oldsize;
1411 : : unsigned offset;
1412 : : Size alignednewsize;
1413 : : int size_diff;
1414 : : int itemcount;
1415 : :
1416 : : /*
1417 : : * As with PageRepairFragmentation, paranoia seems justified.
1418 : : */
1419 [ + - ]: 451857 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1420 [ + - ]: 451857 : phdr->pd_lower > phdr->pd_upper ||
1421 [ + - ]: 451857 : phdr->pd_upper > phdr->pd_special ||
1422 [ + - ]: 451857 : phdr->pd_special > BLCKSZ ||
1423 [ - + ]: 451857 : phdr->pd_special != MAXALIGN(phdr->pd_special))
2774 tgl@sss.pgh.pa.us 1424 [ # # ]:UBC 0 : ereport(ERROR,
1425 : : (errcode(ERRCODE_DATA_CORRUPTED),
1426 : : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1427 : : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1428 : :
2774 tgl@sss.pgh.pa.us 1429 :CBC 451857 : itemcount = PageGetMaxOffsetNumber(page);
1430 [ + - - + ]: 451857 : if ((int) offnum <= 0 || (int) offnum > itemcount)
2774 tgl@sss.pgh.pa.us 1431 [ # # ]:UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1432 : :
2774 tgl@sss.pgh.pa.us 1433 :CBC 451857 : tupid = PageGetItemId(page, offnum);
1434 [ - + ]: 451857 : Assert(ItemIdHasStorage(tupid));
1435 : 451857 : oldsize = ItemIdGetLength(tupid);
1436 : 451857 : offset = ItemIdGetOffset(tupid);
1437 : :
1438 [ + - + - ]: 451857 : if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1439 [ - + ]: 451857 : offset != MAXALIGN(offset))
2774 tgl@sss.pgh.pa.us 1440 [ # # ]:UBC 0 : ereport(ERROR,
1441 : : (errcode(ERRCODE_DATA_CORRUPTED),
1442 : : errmsg("corrupted line pointer: offset = %u, size = %u",
1443 : : offset, (unsigned int) oldsize)));
1444 : :
1445 : : /*
1446 : : * Determine actual change in space requirement, check for page overflow.
1447 : : */
2774 tgl@sss.pgh.pa.us 1448 :CBC 451857 : oldsize = MAXALIGN(oldsize);
1449 : 451857 : alignednewsize = MAXALIGN(newsize);
1450 [ - + ]: 451857 : if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
2774 tgl@sss.pgh.pa.us 1451 :UBC 0 : return false;
1452 : :
1453 : : /*
1454 : : * Relocate existing data and update line pointers, unless the new tuple
1455 : : * is the same size as the old (after alignment), in which case there's
1456 : : * nothing to do. Notice that what we have to relocate is data before the
1457 : : * target tuple, not data after, so it's convenient to express size_diff
1458 : : * as the amount by which the tuple's size is decreasing, making it the
1459 : : * delta to add to pd_upper and affected line pointers.
1460 : : */
2774 tgl@sss.pgh.pa.us 1461 :CBC 451857 : size_diff = oldsize - (int) alignednewsize;
1462 [ + + ]: 451857 : if (size_diff != 0)
1463 : : {
1464 : 46642 : char *addr = (char *) page + phdr->pd_upper;
1465 : : int i;
1466 : :
1467 : : /* relocate all tuple data before the target tuple */
1468 : 46642 : memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1469 : :
1470 : : /* adjust free space boundary pointer */
1471 : 46642 : phdr->pd_upper += size_diff;
1472 : :
1473 : : /* adjust affected line pointers too */
1474 [ + + ]: 7248389 : for (i = FirstOffsetNumber; i <= itemcount; i++)
1475 : : {
643 peter@eisentraut.org 1476 : 7201747 : ItemId ii = PageGetItemId(page, i);
1477 : :
1478 : : /* Allow items without storage; currently only BRIN needs that */
2774 tgl@sss.pgh.pa.us 1479 [ + + + + ]: 7201747 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1480 : 3383988 : ii->lp_off += size_diff;
1481 : : }
1482 : : }
1483 : :
1484 : : /* Update the item's tuple length without changing its lp_flags field */
1509 pg@bowt.ie 1485 : 451857 : tupid->lp_off = offset + size_diff;
1486 : 451857 : tupid->lp_len = newsize;
1487 : :
1488 : : /* Copy new tuple data onto page */
2774 tgl@sss.pgh.pa.us 1489 : 451857 : memcpy(PageGetItem(page, tupid), newtup, newsize);
1490 : :
1491 : 451857 : return true;
1492 : : }
1493 : :
1494 : :
1495 : : /*
1496 : : * Set checksum for a page in shared buffers.
1497 : : *
1498 : : * If checksums are disabled, or if the page is not initialized, just return
1499 : : * the input. Otherwise, we must make a copy of the page before calculating
1500 : : * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1501 : : * from making the final checksum invalid. It doesn't matter if we include or
1502 : : * exclude hints during the copy, as long as we write a valid page and
1503 : : * associated checksum.
1504 : : *
1505 : : * Returns a pointer to the block-sized data that needs to be written. Uses
1506 : : * statically-allocated memory, so the caller must immediately write the
1507 : : * returned page and not refer to it again.
1508 : : */
1509 : : char *
4041 simon@2ndQuadrant.co 1510 : 530338 : PageSetChecksumCopy(Page page, BlockNumber blkno)
1511 : : {
1512 : : static char *pageCopy = NULL;
1513 : :
1514 : : /* If we don't need a checksum, just return the passed-in data */
2197 magnus@hagander.net 1515 [ + + + + ]: 530338 : if (PageIsNew(page) || !DataChecksumsEnabled())
4041 simon@2ndQuadrant.co 1516 : 528861 : return (char *) page;
1517 : :
1518 : : /*
1519 : : * We allocate the copy space once and use it over on each subsequent
1520 : : * call. The point of palloc'ing here, rather than having a static char
1521 : : * array, is first to ensure adequate alignment for the checksumming code
1522 : : * and second to avoid wasting space in processes that never call this.
1523 : : */
3958 tgl@sss.pgh.pa.us 1524 [ + + ]: 1477 : if (pageCopy == NULL)
372 tmunro@postgresql.or 1525 : 11 : pageCopy = MemoryContextAllocAligned(TopMemoryContext,
1526 : : BLCKSZ,
1527 : : PG_IO_ALIGN_SIZE,
1528 : : 0);
1529 : :
3958 tgl@sss.pgh.pa.us 1530 : 1477 : memcpy(pageCopy, (char *) page, BLCKSZ);
1531 : 1477 : ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1532 : 1477 : return pageCopy;
1533 : : }
1534 : :
1535 : : /*
1536 : : * Set checksum for a page in private memory.
1537 : : *
1538 : : * This must only be used when we know that no other process can be modifying
1539 : : * the page buffer.
1540 : : */
1541 : : void
4041 simon@2ndQuadrant.co 1542 : 54370 : PageSetChecksumInplace(Page page, BlockNumber blkno)
1543 : : {
1544 : : /* If we don't need a checksum, just return */
2197 magnus@hagander.net 1545 [ + + + + ]: 54370 : if (PageIsNew(page) || !DataChecksumsEnabled())
4041 simon@2ndQuadrant.co 1546 : 54013 : return;
1547 : :
3958 tgl@sss.pgh.pa.us 1548 : 357 : ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
1549 : : }
|