Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * bufpage.c
4 : * POSTGRES standard buffer page code.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/storage/page/bufpage.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/htup_details.h"
18 : #include "access/itup.h"
19 : #include "access/xlog.h"
20 : #include "pgstat.h"
21 : #include "storage/checksum.h"
22 : #include "utils/memdebug.h"
23 : #include "utils/memutils.h"
24 :
25 :
26 : /* GUC variable */
27 : bool ignore_checksum_failure = false;
28 :
29 :
30 : /* ----------------------------------------------------------------
31 : * Page support functions
32 : * ----------------------------------------------------------------
33 : */
34 :
35 : /*
36 : * PageInit
37 : * Initializes the contents of a page.
38 : * Note that we don't calculate an initial checksum here; that's not done
39 : * until it's time to write.
40 : */
41 : void
9770 scrappy 42 CBC 603792 : PageInit(Page page, Size pageSize, Size specialSize)
43 : {
9344 bruce 44 603792 : PageHeader p = (PageHeader) page;
45 :
7838 tgl 46 603792 : specialSize = MAXALIGN(specialSize);
47 :
9345 bruce 48 603792 : Assert(pageSize == BLCKSZ);
7586 49 603792 : Assert(pageSize > specialSize + SizeOfPageHeaderData);
50 :
51 : /* Make sure all fields of page are zero, as well as unused space */
7754 tgl 52 603792 : MemSet(p, 0, pageSize);
53 :
3670 simon 54 603792 : p->pd_flags = 0;
7586 bruce 55 603792 : p->pd_lower = SizeOfPageHeaderData;
9345 56 603792 : p->pd_upper = pageSize - specialSize;
57 603792 : p->pd_special = pageSize - specialSize;
7524 tgl 58 603792 : PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
59 : /* p->pd_prune_xid = InvalidTransactionId; done by above MemSet */
8135 vadim4o 60 603792 : }
61 :
62 :
63 : /*
64 : * PageIsVerifiedExtended
65 : * Check that the page header and checksum (if any) appear valid.
66 : *
67 : * This is called when a page has just been read in from disk. The idea is
68 : * to cheaply detect trashed pages before we go nuts following bogus line
69 : * pointers, testing invalid transaction identifiers, etc.
70 : *
71 : * It turns out to be necessary to allow zeroed pages here too. Even though
72 : * this routine is *not* called when deliberately adding a page to a relation,
73 : * there are scenarios in which a zeroed page might be found in a table.
74 : * (Example: a backend extends a relation, then crashes before it can write
75 : * any WAL entry about the new page. The kernel will already have the
76 : * zeroed page in the file, and it will stay that way after restart.) So we
77 : * allow zeroed pages here, and are careful that the page access macros
78 : * treat such a page as empty and without free space. Eventually, VACUUM
79 : * will clean up such a page and make it usable.
80 : *
81 : * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
82 : * a checksum failure.
83 : *
84 : * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
85 : * to pgstat.
86 : */
87 : bool
895 michael 88 1310543 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
89 : {
3670 simon 90 1310543 : PageHeader p = (PageHeader) page;
91 : size_t *pagebytes;
92 : int i;
93 1310543 : bool checksum_failure = false;
94 1310543 : bool header_sane = false;
95 1310543 : bool all_zeroes = false;
tgl 96 1310543 : uint16 checksum = 0;
97 :
98 : /*
99 : * Don't verify page data unless the page passes basic non-zero test
100 : */
simon 101 1310543 : if (!PageIsNew(page))
102 : {
1826 magnus 103 1299221 : if (DataChecksumsEnabled())
104 : {
3587 tgl 105 1875 : checksum = pg_checksum_page((char *) page, blkno);
106 :
3670 simon 107 1875 : if (checksum != p->pd_checksum)
3670 simon 108 UBC 0 : checksum_failure = true;
109 : }
110 :
111 : /*
112 : * The following checks don't prove the header is correct, only that
113 : * it looks sane enough to allow into the buffer pool. Later usage of
114 : * the block can still reveal problems, which is why we offer the
115 : * checksum option.
116 : */
3670 simon 117 CBC 1299221 : if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
3602 bruce 118 1299221 : p->pd_lower <= p->pd_upper &&
119 1299221 : p->pd_upper <= p->pd_special &&
120 1299221 : p->pd_special <= BLCKSZ &&
121 1299221 : p->pd_special == MAXALIGN(p->pd_special))
3670 simon 122 1299221 : header_sane = true;
123 :
124 1299221 : if (header_sane && !checksum_failure)
125 1299221 : return true;
126 : }
127 :
128 : /* Check all-zeroes case */
129 11322 : all_zeroes = true;
2404 andres 130 11322 : pagebytes = (size_t *) page;
131 11605050 : for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
132 : {
7317 tgl 133 11593728 : if (pagebytes[i] != 0)
134 : {
3670 simon 135 UBC 0 : all_zeroes = false;
136 0 : break;
137 : }
138 : }
139 :
3670 simon 140 CBC 11322 : if (all_zeroes)
141 11322 : return true;
142 :
143 : /*
144 : * Throw a WARNING if the checksum fails, but only after we've checked for
145 : * the all-zeroes case.
146 : */
3670 simon 147 UBC 0 : if (checksum_failure)
148 : {
895 michael 149 0 : if ((flags & PIV_LOG_WARNING) != 0)
150 0 : ereport(WARNING,
151 : (errcode(ERRCODE_DATA_CORRUPTED),
152 : errmsg("page verification failed, calculated checksum %u but expected %u",
153 : checksum, p->pd_checksum)));
154 :
155 0 : if ((flags & PIV_REPORT_STAT) != 0)
156 0 : pgstat_report_checksum_failure();
157 :
3670 simon 158 0 : if (header_sane && ignore_checksum_failure)
159 0 : return true;
160 : }
161 :
162 0 : return false;
163 : }
164 :
165 :
166 : /*
167 : * PageAddItemExtended
168 : *
169 : * Add an item to a page. Return value is the offset at which it was
170 : * inserted, or InvalidOffsetNumber if the item is not inserted for any
171 : * reason. A WARNING is issued indicating the reason for the refusal.
172 : *
173 : * offsetNumber must be either InvalidOffsetNumber to specify finding a
174 : * free line pointer, or a value between FirstOffsetNumber and one past
175 : * the last existing item, to specify using that particular line pointer.
176 : *
177 : * If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
178 : * the item at the specified offsetNumber, which must be either a
179 : * currently-unused line pointer, or one past the last existing item.
180 : *
181 : * If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
182 : * the item at the specified offsetNumber, moving existing items later
183 : * in the array to make room.
184 : *
185 : * If offsetNumber is not valid, then assign a slot by finding the first
186 : * one that is both unused and deallocated.
187 : *
188 : * If flag PAI_IS_HEAP is set, we enforce that there can't be more than
189 : * MaxHeapTuplesPerPage line pointers on the page.
190 : *
191 : * !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
192 : */
193 : OffsetNumber
2505 alvherre 194 CBC 54412454 : PageAddItemExtended(Page page,
195 : Item item,
196 : Size size,
197 : OffsetNumber offsetNumber,
198 : int flags)
199 : {
7754 tgl 200 54412454 : PageHeader phdr = (PageHeader) page;
201 : Size alignedSize;
202 : int lower;
203 : int upper;
204 : ItemId itemId;
205 : OffsetNumber limit;
8297 206 54412454 : bool needshuffle = false;
207 :
208 : /*
209 : * Be wary about corrupted page pointers
210 : */
7586 bruce 211 54412454 : if (phdr->pd_lower < SizeOfPageHeaderData ||
7754 tgl 212 54412454 : phdr->pd_lower > phdr->pd_upper ||
213 54412454 : phdr->pd_upper > phdr->pd_special ||
214 54412454 : phdr->pd_special > BLCKSZ)
7199 tgl 215 UBC 0 : ereport(PANIC,
216 : (errcode(ERRCODE_DATA_CORRUPTED),
217 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
218 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
219 :
220 : /*
221 : * Select offsetNumber to place the new item at
222 : */
9345 bruce 223 CBC 54412454 : limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
224 :
225 : /* was offsetNumber passed in? */
226 54412454 : if (OffsetNumberIsValid(offsetNumber))
227 : {
228 : /* yes, check it */
2505 alvherre 229 37705721 : if ((flags & PAI_OVERWRITE) != 0)
230 : {
8097 vadim4o 231 1473588 : if (offsetNumber < limit)
232 : {
272 peter 233 GNC 9463 : itemId = PageGetItemId(page, offsetNumber);
5688 tgl 234 CBC 9463 : if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
235 : {
7199 tgl 236 UBC 0 : elog(WARNING, "will not overwrite a used ItemId");
8097 vadim4o 237 0 : return InvalidOffsetNumber;
238 : }
239 : }
240 : }
241 : else
242 : {
7551 tgl 243 CBC 36232133 : if (offsetNumber < limit)
2118 244 4770945 : needshuffle = true; /* need to move existing linp's */
245 : }
246 : }
247 : else
248 : {
249 : /* offsetNumber was not passed in, so find a free slot */
250 : /* if no free slot, we'll put it at limit (1st open slot) */
272 peter 251 GNC 16706733 : if (PageHasFreeLinePointers(page))
252 : {
253 : /*
254 : * Scan line pointer array to locate a "recyclable" (unused)
255 : * ItemId.
256 : *
257 : * Always use earlier items first. PageTruncateLinePointerArray
258 : * can only truncate unused items when they appear as a contiguous
259 : * group at the end of the line pointer array.
260 : */
732 pg 261 CBC 188051 : for (offsetNumber = FirstOffsetNumber;
697 tgl 262 14876819 : offsetNumber < limit; /* limit is maxoff+1 */
732 pg 263 14688768 : offsetNumber++)
264 : {
272 peter 265 GNC 14865919 : itemId = PageGetItemId(page, offsetNumber);
266 :
267 : /*
268 : * We check for no storage as well, just to be paranoid;
269 : * unused items should never have storage. Assert() that the
270 : * invariant is respected too.
271 : */
742 pg 272 CBC 14865919 : Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
273 :
5688 tgl 274 14865919 : if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
5882 275 177151 : break;
276 : }
277 188051 : if (offsetNumber >= limit)
278 : {
279 : /* the hint is wrong, so reset it */
272 peter 280 GNC 10900 : PageClearHasFreeLinePointers(page);
281 : }
282 : }
283 : else
284 : {
285 : /* don't bother searching if hint says there's no free slot */
5882 tgl 286 CBC 16518682 : offsetNumber = limit;
287 : }
288 : }
289 :
290 : /* Reject placing items beyond the first unused line pointer */
2403 291 54412454 : if (offsetNumber > limit)
292 : {
7199 tgl 293 UBC 0 : elog(WARNING, "specified item offset is too large");
7551 294 0 : return InvalidOffsetNumber;
295 : }
296 :
297 : /* Reject placing items beyond heap boundary, if heap */
2505 alvherre 298 CBC 54412454 : if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
299 : {
5680 tgl 300 UBC 0 : elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
301 0 : return InvalidOffsetNumber;
302 : }
303 :
304 : /*
305 : * Compute new lower and upper pointers for page, see if it'll fit.
306 : *
307 : * Note: do arithmetic as signed ints, to avoid mistakes if, say,
308 : * alignedSize > pd_upper.
309 : */
2403 tgl 310 CBC 54412454 : if (offsetNumber == limit || needshuffle)
7754 311 54225840 : lower = phdr->pd_lower + sizeof(ItemIdData);
312 : else
313 186614 : lower = phdr->pd_lower;
314 :
8665 bruce 315 54412454 : alignedSize = MAXALIGN(size);
316 :
7754 tgl 317 54412454 : upper = (int) phdr->pd_upper - (int) alignedSize;
318 :
9345 bruce 319 54412454 : if (lower > upper)
8986 bruce 320 UBC 0 : return InvalidOffsetNumber;
321 :
322 : /*
323 : * OK to insert the item. First, shuffle the existing pointers if needed.
324 : */
272 peter 325 GNC 54412454 : itemId = PageGetItemId(page, offsetNumber);
326 :
7551 tgl 327 CBC 54412454 : if (needshuffle)
328 4770945 : memmove(itemId + 1, itemId,
329 4770945 : (limit - offsetNumber) * sizeof(ItemIdData));
330 :
331 : /* set the line pointer */
5688 332 54412454 : ItemIdSetNormal(itemId, upper, size);
333 :
334 : /*
335 : * Items normally contain no uninitialized bytes. Core bufpage consumers
336 : * conform, but this is not a necessary coding rule; a new index AM could
337 : * opt to depart from it. However, data type input functions and other
338 : * C-language functions that synthesize datums should initialize all
339 : * bytes; datumIsEqual() relies on this. Testing here, along with the
340 : * similar check in printtup(), helps to catch such mistakes.
341 : *
342 : * Values of the "name" type retrieved via index-only scans may contain
343 : * uninitialized bytes; see comment in btrescan(). Valgrind will report
344 : * this as an error, but it is safe to ignore.
345 : */
346 : VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
347 :
348 : /* copy the item's data onto the page */
7551 349 54412454 : memcpy((char *) page + upper, item, size);
350 :
351 : /* adjust page header */
7754 352 54412454 : phdr->pd_lower = (LocationIndex) lower;
353 54412454 : phdr->pd_upper = (LocationIndex) upper;
354 :
8986 bruce 355 54412454 : return offsetNumber;
356 : }
357 :
358 :
359 : /*
360 : * PageGetTempPage
361 : * Get a temporary page in local memory for special processing.
362 : * The returned page is not initialized at all; caller must do that.
363 : */
364 : Page
5270 tgl 365 23593 : PageGetTempPage(Page page)
366 : {
367 : Size pageSize;
368 : Page temp;
369 :
370 23593 : pageSize = PageGetPageSize(page);
371 23593 : temp = (Page) palloc(pageSize);
372 :
373 23593 : return temp;
374 : }
375 :
376 : /*
377 : * PageGetTempPageCopy
378 : * Get a temporary page in local memory for special processing.
379 : * The page is initialized by copying the contents of the given page.
380 : */
381 : Page
382 4413 : PageGetTempPageCopy(Page page)
383 : {
384 : Size pageSize;
385 : Page temp;
386 :
9345 bruce 387 4413 : pageSize = PageGetPageSize(page);
8297 tgl 388 4413 : temp = (Page) palloc(pageSize);
389 :
7838 390 4413 : memcpy(temp, page, pageSize);
391 :
5270 392 4413 : return temp;
393 : }
394 :
395 : /*
396 : * PageGetTempPageCopySpecial
397 : * Get a temporary page in local memory for special processing.
398 : * The page is PageInit'd with the same special-space size as the
399 : * given page, and the special space is copied from the given page.
400 : */
401 : Page
402 44342 : PageGetTempPageCopySpecial(Page page)
403 : {
404 : Size pageSize;
405 : Page temp;
406 :
407 44342 : pageSize = PageGetPageSize(page);
408 44342 : temp = (Page) palloc(pageSize);
409 :
410 44342 : PageInit(temp, pageSize, PageGetSpecialSize(page));
411 44342 : memcpy(PageGetSpecialPointer(temp),
412 44342 : PageGetSpecialPointer(page),
413 44342 : PageGetSpecialSize(page));
414 :
8986 bruce 415 44342 : return temp;
416 : }
417 :
418 : /*
419 : * PageRestoreTempPage
420 : * Copy temporary page back to permanent page after special processing
421 : * and release the temporary page.
422 : */
423 : void
9770 scrappy 424 65016 : PageRestoreTempPage(Page tempPage, Page oldPage)
425 : {
426 : Size pageSize;
427 :
9345 bruce 428 65016 : pageSize = PageGetPageSize(tempPage);
7551 tgl 429 65016 : memcpy((char *) oldPage, (char *) tempPage, pageSize);
430 :
9345 bruce 431 65016 : pfree(tempPage);
9770 scrappy 432 65016 : }
433 :
434 : /*
435 : * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
436 : */
437 : typedef struct itemIdCompactData
438 : {
439 : uint16 offsetindex; /* linp array index */
440 : int16 itemoff; /* page offset of item data */
441 : uint16 alignedlen; /* MAXALIGN(item data len) */
442 : } itemIdCompactData;
443 : typedef itemIdCompactData *itemIdCompact;
444 :
445 : /*
446 : * After removing or marking some line pointers unused, move the tuples to
447 : * remove the gaps caused by the removed items and reorder them back into
448 : * reverse line pointer order in the page.
449 : *
450 : * This function can often be fairly hot, so it pays to take some measures to
451 : * make it as optimal as possible.
452 : *
453 : * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
454 : * descending order of itemoff. When this is true we can just memmove()
455 : * tuples towards the end of the page. This is quite a common case as it's
456 : * the order that tuples are initially inserted into pages. When we call this
457 : * function to defragment the tuples in the page then any new line pointers
458 : * added to the page will keep that presorted order, so hitting this case is
459 : * still very common for tables that are commonly updated.
460 : *
461 : * When the 'itemidbase' array is not presorted then we're unable to just
462 : * memmove() tuples around freely. Doing so could cause us to overwrite the
463 : * memory belonging to a tuple we've not moved yet. In this case, we copy all
464 : * the tuples that need to be moved into a temporary buffer. We can then
465 : * simply memcpy() out of that temp buffer back into the page at the correct
466 : * location. Tuples are copied back into the page in the same order as the
467 : * 'itemidbase' array, so we end up reordering the tuples back into reverse
468 : * line pointer order. This will increase the chances of hitting the
469 : * presorted case the next time around.
470 : *
471 : * Callers must ensure that nitems is > 0
472 : */
473 : static void
935 drowley 474 132565 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
475 : {
2987 heikki.linnakangas 476 132565 : PageHeader phdr = (PageHeader) page;
477 : Offset upper;
478 : Offset copy_tail;
479 : Offset copy_head;
480 : itemIdCompact itemidptr;
481 : int i;
482 :
483 : /* Code within will not work correctly if nitems == 0 */
935 drowley 484 132565 : Assert(nitems > 0);
485 :
486 132565 : if (presorted)
487 : {
488 :
489 : #ifdef USE_ASSERT_CHECKING
490 : {
491 : /*
492 : * Verify we've not gotten any new callers that are incorrectly
493 : * passing a true presorted value.
494 : */
495 103916 : Offset lastoff = phdr->pd_special;
496 :
497 4127687 : for (i = 0; i < nitems; i++)
498 : {
499 4023771 : itemidptr = &itemidbase[i];
500 :
501 4023771 : Assert(lastoff > itemidptr->itemoff);
502 :
503 4023771 : lastoff = itemidptr->itemoff;
504 : }
505 : }
506 : #endif /* USE_ASSERT_CHECKING */
507 :
508 : /*
509 : * 'itemidbase' is already in the optimal order, i.e, lower item
510 : * pointers have a higher offset. This allows us to memmove() the
511 : * tuples up to the end of the page without having to worry about
512 : * overwriting other tuples that have not been moved yet.
513 : *
514 : * There's a good chance that there are tuples already right at the
515 : * end of the page that we can simply skip over because they're
516 : * already in the correct location within the page. We'll do that
517 : * first...
518 : */
519 103916 : upper = phdr->pd_special;
520 103916 : i = 0;
521 : do
522 : {
523 1905885 : itemidptr = &itemidbase[i];
524 1905885 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
525 99547 : break;
526 1806338 : upper -= itemidptr->alignedlen;
527 :
528 1806338 : i++;
529 1806338 : } while (i < nitems);
530 :
531 : /*
532 : * Now that we've found the first tuple that needs to be moved, we can
533 : * do the tuple compactification. We try and make the least number of
534 : * memmove() calls and only call memmove() when there's a gap. When
535 : * we see a gap we just move all tuples after the gap up until the
536 : * point of the last move operation.
537 : */
538 103916 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
539 2321349 : for (; i < nitems; i++)
540 : {
541 : ItemId lp;
542 :
543 2217433 : itemidptr = &itemidbase[i];
544 2217433 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
545 :
546 2217433 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
547 : {
548 147550 : memmove((char *) page + upper,
549 147550 : page + copy_head,
550 147550 : copy_tail - copy_head);
551 :
552 : /*
553 : * We've now moved all tuples already seen, but not the
554 : * current tuple, so we set the copy_tail to the end of this
555 : * tuple so it can be moved in another iteration of the loop.
556 : */
557 147550 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
558 : }
559 : /* shift the target offset down by the length of this tuple */
560 2217433 : upper -= itemidptr->alignedlen;
561 : /* point the copy_head to the start of this tuple */
562 2217433 : copy_head = itemidptr->itemoff;
563 :
564 : /* update the line pointer to reference the new offset */
565 2217433 : lp->lp_off = upper;
566 : }
567 :
568 : /* move the remaining tuples. */
2987 heikki.linnakangas 569 103916 : memmove((char *) page + upper,
935 drowley 570 103916 : page + copy_head,
571 103916 : copy_tail - copy_head);
572 : }
573 : else
574 : {
575 : PGAlignedBlock scratch;
576 28649 : char *scratchptr = scratch.data;
577 :
578 : /*
579 : * Non-presorted case: The tuples in the itemidbase array may be in
580 : * any order. So, in order to move these to the end of the page we
581 : * must make a temp copy of each tuple that needs to be moved before
582 : * we copy them back into the page at the new offset.
583 : *
584 : * If a large percentage of tuples have been pruned (>75%) then we'll
585 : * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
586 : * just do a single memcpy() for all tuples that need to be moved.
587 : * When so many tuples have been removed there's likely to be a lot of
588 : * gaps and it's unlikely that many non-movable tuples remain at the
589 : * end of the page.
590 : */
591 28649 : if (nitems < PageGetMaxOffsetNumber(page) / 4)
592 : {
593 506 : i = 0;
594 : do
595 : {
596 9068 : itemidptr = &itemidbase[i];
597 9068 : memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
598 9068 : itemidptr->alignedlen);
599 9068 : i++;
600 9068 : } while (i < nitems);
601 :
602 : /* Set things up for the compactification code below */
603 506 : i = 0;
604 506 : itemidptr = &itemidbase[0];
605 506 : upper = phdr->pd_special;
606 : }
607 : else
608 : {
609 28143 : upper = phdr->pd_special;
610 :
611 : /*
612 : * Many tuples are likely to already be in the correct location.
613 : * There's no need to copy these into the temp buffer. Instead
614 : * we'll just skip forward in the itemidbase array to the position
615 : * that we do need to move tuples from so that the code below just
616 : * leaves these ones alone.
617 : */
618 28143 : i = 0;
619 : do
620 : {
621 472797 : itemidptr = &itemidbase[i];
622 472797 : if (upper != itemidptr->itemoff + itemidptr->alignedlen)
623 28143 : break;
624 444654 : upper -= itemidptr->alignedlen;
625 :
626 444654 : i++;
627 444654 : } while (i < nitems);
628 :
629 : /* Copy all tuples that need to be moved into the temp buffer */
630 28143 : memcpy(scratchptr + phdr->pd_upper,
631 28143 : page + phdr->pd_upper,
632 28143 : upper - phdr->pd_upper);
633 : }
634 :
635 : /*
636 : * Do the tuple compactification. itemidptr is already pointing to
637 : * the first tuple that we're going to move. Here we collapse the
638 : * memcpy calls for adjacent tuples into a single call. This is done
639 : * by delaying the memcpy call until we find a gap that needs to be
640 : * closed.
641 : */
642 28649 : copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
643 3539136 : for (; i < nitems; i++)
644 : {
645 : ItemId lp;
646 :
647 3510487 : itemidptr = &itemidbase[i];
648 3510487 : lp = PageGetItemId(page, itemidptr->offsetindex + 1);
649 :
650 : /* copy pending tuples when we detect a gap */
651 3510487 : if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
652 : {
653 658478 : memcpy((char *) page + upper,
654 658478 : scratchptr + copy_head,
655 658478 : copy_tail - copy_head);
656 :
657 : /*
658 : * We've now copied all tuples already seen, but not the
659 : * current tuple, so we set the copy_tail to the end of this
660 : * tuple.
661 : */
662 658478 : copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
663 : }
664 : /* shift the target offset down by the length of this tuple */
665 3510487 : upper -= itemidptr->alignedlen;
666 : /* point the copy_head to the start of this tuple */
667 3510487 : copy_head = itemidptr->itemoff;
668 :
669 : /* update the line pointer to reference the new offset */
670 3510487 : lp->lp_off = upper;
671 : }
672 :
673 : /* Copy the remaining chunk */
674 28649 : memcpy((char *) page + upper,
675 28649 : scratchptr + copy_head,
676 28649 : copy_tail - copy_head);
677 : }
678 :
2987 heikki.linnakangas 679 132565 : phdr->pd_upper = upper;
680 132565 : }
681 :
682 : /*
683 : * PageRepairFragmentation
684 : *
685 : * Frees fragmented space on a heap page following pruning.
686 : *
687 : * This routine is usable for heap pages only, but see PageIndexMultiDelete.
688 : *
689 : * This routine removes unused line pointers from the end of the line pointer
690 : * array. This is possible when dead heap-only tuples get removed by pruning,
691 : * especially when there were HOT chains with several tuples each beforehand.
692 : *
693 : * Caller had better have a full cleanup lock on page's buffer. As a side
694 : * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
695 : * needed. Caller might also need to account for a reduction in the length of
696 : * the line pointer array following array truncation.
697 : */
698 : void
5680 tgl 699 118651 : PageRepairFragmentation(Page page)
700 : {
7838 701 118651 : Offset pd_lower = ((PageHeader) page)->pd_lower;
702 118651 : Offset pd_upper = ((PageHeader) page)->pd_upper;
703 118651 : Offset pd_special = ((PageHeader) page)->pd_special;
704 : Offset last_offset;
705 : itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
706 : itemIdCompact itemidptr;
707 : ItemId lp;
708 : int nline,
709 : nstorage,
710 : nunused;
367 pg 711 118651 : OffsetNumber finalusedlp = InvalidOffsetNumber;
712 : int i;
713 : Size totallen;
935 drowley 714 118651 : bool presorted = true; /* For now */
715 :
716 : /*
717 : * It's worth the trouble to be more paranoid here than in most places,
718 : * because we are about to reshuffle data in (what is usually) a shared
719 : * disk buffer. If we aren't careful then corrupted pointers, lengths,
720 : * etc could cause us to clobber adjacent disk buffers, spreading the data
721 : * loss further. So, check everything.
722 : */
7586 bruce 723 118651 : if (pd_lower < SizeOfPageHeaderData ||
7838 tgl 724 118651 : pd_lower > pd_upper ||
725 118651 : pd_upper > pd_special ||
726 118651 : pd_special > BLCKSZ ||
727 118651 : pd_special != MAXALIGN(pd_special))
7199 tgl 728 UBC 0 : ereport(ERROR,
729 : (errcode(ERRCODE_DATA_CORRUPTED),
730 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
731 : pd_lower, pd_upper, pd_special)));
732 :
733 : /*
734 : * Run through the line pointer array and collect data about live items.
735 : */
7838 tgl 736 CBC 118651 : nline = PageGetMaxOffsetNumber(page);
1983 737 118651 : itemidptr = itemidbase;
738 118651 : nunused = totallen = 0;
935 drowley 739 118651 : last_offset = pd_special;
5680 tgl 740 7826282 : for (i = FirstOffsetNumber; i <= nline; i++)
741 : {
742 7707631 : lp = PageGetItemId(page, i);
6408 bruce 743 7707631 : if (ItemIdIsUsed(lp))
744 : {
5688 tgl 745 7507032 : if (ItemIdHasStorage(lp))
746 : {
1983 747 4443012 : itemidptr->offsetindex = i - 1;
748 4443012 : itemidptr->itemoff = ItemIdGetOffset(lp);
749 :
935 drowley 750 4443012 : if (last_offset > itemidptr->itemoff)
751 3946432 : last_offset = itemidptr->itemoff;
752 : else
753 496580 : presorted = false;
754 :
1983 tgl 755 4443012 : if (unlikely(itemidptr->itemoff < (int) pd_upper ||
756 : itemidptr->itemoff >= (int) pd_special))
1983 tgl 757 UBC 0 : ereport(ERROR,
758 : (errcode(ERRCODE_DATA_CORRUPTED),
759 : errmsg("corrupted line pointer: %u",
760 : itemidptr->itemoff)));
1983 tgl 761 CBC 4443012 : itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
762 4443012 : totallen += itemidptr->alignedlen;
763 4443012 : itemidptr++;
764 : }
765 :
367 pg 766 7507032 : finalusedlp = i; /* Could be the final non-LP_UNUSED item */
767 : }
768 : else
769 : {
770 : /* Unused entries should have lp_len = 0, but make sure */
771 200599 : Assert(!ItemIdHasStorage(lp));
5688 tgl 772 200599 : ItemIdSetUnused(lp);
5680 773 200599 : nunused++;
774 : }
775 : }
776 :
1983 777 118651 : nstorage = itemidptr - itemidbase;
5688 778 118651 : if (nstorage == 0)
779 : {
780 : /* Page is completely empty, so just reset it quickly */
6516 781 9852 : ((PageHeader) page)->pd_upper = pd_special;
782 : }
783 : else
784 : {
785 : /* Need to compact the page the hard way */
7838 786 108799 : if (totallen > (Size) (pd_special - pd_lower))
7199 tgl 787 UBC 0 : ereport(ERROR,
788 : (errcode(ERRCODE_DATA_CORRUPTED),
789 : errmsg("corrupted item lengths: total %u, available space %u",
790 : (unsigned int) totallen, pd_special - pd_lower)));
791 :
935 drowley 792 CBC 108799 : compactify_tuples(itemidbase, nstorage, page, presorted);
793 : }
794 :
367 pg 795 118651 : if (finalusedlp != nline)
796 : {
797 : /* The last line pointer is not the last used line pointer */
332 tgl 798 1253 : int nunusedend = nline - finalusedlp;
799 :
367 pg 800 1253 : Assert(nunused >= nunusedend && nunusedend > 0);
801 :
802 : /* remove trailing unused line pointers from the count */
803 1253 : nunused -= nunusedend;
804 : /* truncate the line pointer array */
805 1253 : ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
806 : }
807 :
808 : /* Set hint bit for PageAddItemExtended */
5680 tgl 809 118651 : if (nunused > 0)
5882 810 21189 : PageSetHasFreeLinePointers(page);
811 : else
812 97462 : PageClearHasFreeLinePointers(page);
9770 scrappy 813 118651 : }
814 :
815 : /*
816 : * PageTruncateLinePointerArray
817 : *
818 : * Removes unused line pointers at the end of the line pointer array.
819 : *
820 : * This routine is usable for heap pages only. It is called by VACUUM during
821 : * its second pass over the heap. We expect at least one LP_UNUSED line
822 : * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
823 : * it just set to LP_UNUSED then it should not call here).
824 : *
825 : * We avoid truncating the line pointer array to 0 items, if necessary by
826 : * leaving behind a single remaining LP_UNUSED item. This is a little
827 : * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
828 : * page behind.
829 : *
830 : * Caller can have either an exclusive lock or a full cleanup lock on page's
831 : * buffer. The page's PD_HAS_FREE_LINES hint bit will be set or unset based
832 : * on whether or not we leave behind any remaining LP_UNUSED items.
833 : */
834 : void
732 pg 835 26828 : PageTruncateLinePointerArray(Page page)
836 : {
837 26828 : PageHeader phdr = (PageHeader) page;
838 26828 : bool countdone = false,
839 26828 : sethint = false;
840 26828 : int nunusedend = 0;
841 :
842 : /* Scan line pointer array back-to-front */
843 1037172 : for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
844 : {
845 1036899 : ItemId lp = PageGetItemId(page, i);
846 :
847 1036899 : if (!countdone && i > FirstOffsetNumber)
848 : {
849 : /*
850 : * Still determining which line pointers from the end of the array
851 : * will be truncated away. Either count another line pointer as
852 : * safe to truncate, or notice that it's not safe to truncate
853 : * additional line pointers (stop counting line pointers).
854 : */
855 724761 : if (!ItemIdIsUsed(lp))
856 704129 : nunusedend++;
857 : else
858 20632 : countdone = true;
859 : }
860 : else
861 : {
862 : /*
863 : * Once we've stopped counting we still need to figure out if
864 : * there are any remaining LP_UNUSED line pointers somewhere more
865 : * towards the front of the array.
866 : */
867 312138 : if (!ItemIdIsUsed(lp))
868 : {
869 : /*
870 : * This is an unused line pointer that we won't be truncating
871 : * away -- so there is at least one. Set hint on page.
872 : */
873 26555 : sethint = true;
874 26555 : break;
875 : }
876 : }
877 : }
878 :
879 26828 : if (nunusedend > 0)
880 : {
881 8048 : phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
882 :
883 : #ifdef CLOBBER_FREED_MEMORY
884 8048 : memset((char *) page + phdr->pd_lower, 0x7F,
885 : sizeof(ItemIdData) * nunusedend);
886 : #endif
887 : }
888 : else
889 18780 : Assert(sethint);
890 :
891 : /* Set hint bit for PageAddItemExtended */
892 26828 : if (sethint)
893 26555 : PageSetHasFreeLinePointers(page);
894 : else
895 273 : PageClearHasFreeLinePointers(page);
896 26828 : }
897 :
898 : /*
899 : * PageGetFreeSpace
900 : * Returns the size of the free (allocatable) space on a page,
901 : * reduced by the space needed for a new line pointer.
902 : *
903 : * Note: this should usually only be used on index pages. Use
904 : * PageGetHeapFreeSpace on heap pages.
905 : */
906 : Size
9770 scrappy 907 46099439 : PageGetFreeSpace(Page page)
908 : {
909 : int space;
910 :
911 : /*
912 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
913 : * pd_upper.
914 : */
7754 tgl 915 46099439 : space = (int) ((PageHeader) page)->pd_upper -
916 46099439 : (int) ((PageHeader) page)->pd_lower;
917 :
918 46099439 : if (space < (int) sizeof(ItemIdData))
8986 bruce 919 15056 : return 0;
5891 920 46084383 : space -= sizeof(ItemIdData);
921 :
922 46084383 : return (Size) space;
923 : }
924 :
925 : /*
926 : * PageGetFreeSpaceForMultipleTuples
927 : * Returns the size of the free (allocatable) space on a page,
928 : * reduced by the space needed for multiple new line pointers.
929 : *
930 : * Note: this should usually only be used on index pages. Use
931 : * PageGetHeapFreeSpace on heap pages.
932 : */
933 : Size
2232 rhaas 934 63637 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
935 : {
936 : int space;
937 :
938 : /*
939 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
940 : * pd_upper.
941 : */
942 63637 : space = (int) ((PageHeader) page)->pd_upper -
943 63637 : (int) ((PageHeader) page)->pd_lower;
944 :
945 63637 : if (space < (int) (ntups * sizeof(ItemIdData)))
2232 rhaas 946 UBC 0 : return 0;
2232 rhaas 947 CBC 63637 : space -= ntups * sizeof(ItemIdData);
948 :
949 63637 : return (Size) space;
950 : }
951 :
952 : /*
953 : * PageGetExactFreeSpace
954 : * Returns the size of the free (allocatable) space on a page,
955 : * without any consideration for adding/removing line pointers.
956 : */
957 : Size
5891 bruce 958 1662320 : PageGetExactFreeSpace(Page page)
959 : {
960 : int space;
961 :
962 : /*
963 : * Use signed arithmetic here so that we behave sensibly if pd_lower >
964 : * pd_upper.
965 : */
966 1662320 : space = (int) ((PageHeader) page)->pd_upper -
967 1662320 : (int) ((PageHeader) page)->pd_lower;
968 :
5537 tgl 969 1662320 : if (space < 0)
5537 tgl 970 UBC 0 : return 0;
971 :
7754 tgl 972 CBC 1662320 : return (Size) space;
973 : }
974 :
975 :
976 : /*
977 : * PageGetHeapFreeSpace
978 : * Returns the size of the free (allocatable) space on a page,
979 : * reduced by the space needed for a new line pointer.
980 : *
981 : * The difference between this and PageGetFreeSpace is that this will return
982 : * zero if there are already MaxHeapTuplesPerPage line pointers in the page
983 : * and none are free. We use this to enforce that no more than
984 : * MaxHeapTuplesPerPage line pointers are created on a heap page. (Although
985 : * no more tuples than that could fit anyway, in the presence of redirected
986 : * or dead line pointers it'd be possible to have too many line pointers.
987 : * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
988 : * on the number of line pointers, we make this extra check.)
989 : */
990 : Size
5680 991 18756254 : PageGetHeapFreeSpace(Page page)
992 : {
993 : Size space;
994 :
995 18756254 : space = PageGetFreeSpace(page);
996 18756254 : if (space > 0)
997 : {
998 : OffsetNumber offnum,
999 : nline;
1000 :
1001 : /*
1002 : * Are there already MaxHeapTuplesPerPage line pointers in the page?
1003 : */
1004 18732165 : nline = PageGetMaxOffsetNumber(page);
1005 18732165 : if (nline >= MaxHeapTuplesPerPage)
1006 : {
272 peter 1007 GNC 1893 : if (PageHasFreeLinePointers(page))
1008 : {
1009 : /*
1010 : * Since this is just a hint, we must confirm that there is
1011 : * indeed a free line pointer
1012 : */
5444 bruce 1013 CBC 165286 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1014 : {
5624 1015 165230 : ItemId lp = PageGetItemId(page, offnum);
1016 :
5680 tgl 1017 165230 : if (!ItemIdIsUsed(lp))
1018 711 : break;
1019 : }
1020 :
1021 767 : if (offnum > nline)
1022 : {
1023 : /*
1024 : * The hint is wrong, but we can't clear it here since we
1025 : * don't have the ability to mark the page dirty.
1026 : */
1027 56 : space = 0;
1028 : }
1029 : }
1030 : else
1031 : {
1032 : /*
1033 : * Although the hint might be wrong, PageAddItem will believe
1034 : * it anyway, so we must believe it too.
1035 : */
1036 1126 : space = 0;
1037 : }
1038 : }
1039 : }
1040 18756254 : return space;
1041 : }
1042 :
1043 :
1044 : /*
1045 : * PageIndexTupleDelete
1046 : *
1047 : * This routine does the work of removing a tuple from an index page.
1048 : *
1049 : * Unlike heap pages, we compact out the line pointer for the removed tuple.
1050 : */
1051 : void
9770 scrappy 1052 413620 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
1053 : {
7754 tgl 1054 413620 : PageHeader phdr = (PageHeader) page;
1055 : char *addr;
1056 : ItemId tup;
1057 : Size size;
1058 : unsigned offset;
1059 : int nbytes;
1060 : int offidx;
1061 : int nline;
1062 :
1063 : /*
1064 : * As with PageRepairFragmentation, paranoia seems justified.
1065 : */
7586 bruce 1066 413620 : if (phdr->pd_lower < SizeOfPageHeaderData ||
7754 tgl 1067 413620 : phdr->pd_lower > phdr->pd_upper ||
1068 413620 : phdr->pd_upper > phdr->pd_special ||
2403 1069 413620 : phdr->pd_special > BLCKSZ ||
1070 413620 : phdr->pd_special != MAXALIGN(phdr->pd_special))
7199 tgl 1071 UBC 0 : ereport(ERROR,
1072 : (errcode(ERRCODE_DATA_CORRUPTED),
1073 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1074 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1075 :
7754 tgl 1076 CBC 413620 : nline = PageGetMaxOffsetNumber(page);
1077 413620 : if ((int) offnum <= 0 || (int) offnum > nline)
7199 tgl 1078 UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1079 :
1080 : /* change offset number to offset index */
9345 bruce 1081 CBC 413620 : offidx = offnum - 1;
1082 :
1083 413620 : tup = PageGetItemId(page, offnum);
5688 tgl 1084 413620 : Assert(ItemIdHasStorage(tup));
9345 bruce 1085 413620 : size = ItemIdGetLength(tup);
7754 tgl 1086 413620 : offset = ItemIdGetOffset(tup);
1087 :
1088 413620 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
2403 1089 413620 : offset != MAXALIGN(offset))
7199 tgl 1090 UBC 0 : ereport(ERROR,
1091 : (errcode(ERRCODE_DATA_CORRUPTED),
1092 : errmsg("corrupted line pointer: offset = %u, size = %u",
1093 : offset, (unsigned int) size)));
1094 :
1095 : /* Amount of space to actually be deleted */
2403 tgl 1096 CBC 413620 : size = MAXALIGN(size);
1097 :
1098 : /*
1099 : * First, we want to get rid of the pd_linp entry for the index tuple. We
1100 : * copy all subsequent linp's back one slot in the array. We don't use
1101 : * PageGetItemId, because we are manipulating the _array_, not individual
1102 : * linp's.
1103 : */
9345 bruce 1104 413620 : nbytes = phdr->pd_lower -
1105 413620 : ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
1106 :
7551 tgl 1107 413620 : if (nbytes > 0)
1108 404270 : memmove((char *) &(phdr->pd_linp[offidx]),
1109 404270 : (char *) &(phdr->pd_linp[offidx + 1]),
1110 : nbytes);
1111 :
1112 : /*
1113 : * Now move everything between the old upper bound (beginning of tuple
1114 : * space) and the beginning of the deleted tuple forward, so that space in
1115 : * the middle of the page is left free. If we've just deleted the tuple
1116 : * at the beginning of tuple space, then there's no need to do the copy.
1117 : */
1118 :
1119 : /* beginning of tuple space */
7754 1120 413620 : addr = (char *) page + phdr->pd_upper;
1121 :
1122 413620 : if (offset > phdr->pd_upper)
2403 1123 403438 : memmove(addr + size, addr, offset - phdr->pd_upper);
1124 :
1125 : /* adjust free space boundary pointers */
9345 bruce 1126 413620 : phdr->pd_upper += size;
1127 413620 : phdr->pd_lower -= sizeof(ItemIdData);
1128 :
1129 : /*
1130 : * Finally, we need to adjust the linp entries that remain.
1131 : *
1132 : * Anything that used to be before the deleted tuple's data was moved
1133 : * forward by the size of the deleted tuple.
1134 : */
1135 413620 : if (!PageIsEmpty(page))
1136 : {
1137 : int i;
1138 :
7754 tgl 1139 412822 : nline--; /* there's one less than when we started */
7393 bruce 1140 72556087 : for (i = 1; i <= nline; i++)
1141 : {
272 peter 1142 GNC 72143265 : ItemId ii = PageGetItemId(page, i);
1143 :
5688 tgl 1144 CBC 72143265 : Assert(ItemIdHasStorage(ii));
6408 bruce 1145 72143265 : if (ItemIdGetOffset(ii) <= offset)
6879 1146 47227724 : ii->lp_off += size;
1147 : }
1148 : }
9770 scrappy 1149 413620 : }
1150 :
1151 :
1152 : /*
1153 : * PageIndexMultiDelete
1154 : *
1155 : * This routine handles the case of deleting multiple tuples from an
1156 : * index page at once. It is considerably faster than a loop around
1157 : * PageIndexTupleDelete ... however, the caller *must* supply the array
1158 : * of item numbers to be deleted in item number order!
1159 : */
1160 : void
6592 tgl 1161 29721 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
1162 : {
1163 29721 : PageHeader phdr = (PageHeader) page;
1164 29721 : Offset pd_lower = phdr->pd_lower;
1165 29721 : Offset pd_upper = phdr->pd_upper;
1166 29721 : Offset pd_special = phdr->pd_special;
1167 : Offset last_offset;
1168 : itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
1169 : ItemIdData newitemids[MaxIndexTuplesPerPage];
1170 : itemIdCompact itemidptr;
1171 : ItemId lp;
1172 : int nline,
1173 : nused;
1174 : Size totallen;
1175 : Size size;
1176 : unsigned offset;
1177 : int nextitm;
1178 : OffsetNumber offnum;
935 drowley 1179 29721 : bool presorted = true; /* For now */
1180 :
2882 heikki.linnakangas 1181 29721 : Assert(nitems <= MaxIndexTuplesPerPage);
1182 :
1183 : /*
1184 : * If there aren't very many items to delete, then retail
1185 : * PageIndexTupleDelete is the best way. Delete the items in reverse
1186 : * order so we don't have to think about adjusting item numbers for
1187 : * previous deletions.
1188 : *
1189 : * TODO: tune the magic number here
1190 : */
6592 tgl 1191 29721 : if (nitems <= 2)
1192 : {
1193 14213 : while (--nitems >= 0)
1194 8381 : PageIndexTupleDelete(page, itemnos[nitems]);
1195 5832 : return;
1196 : }
1197 :
1198 : /*
1199 : * As with PageRepairFragmentation, paranoia seems justified.
1200 : */
1201 23889 : if (pd_lower < SizeOfPageHeaderData ||
1202 23889 : pd_lower > pd_upper ||
1203 23889 : pd_upper > pd_special ||
1204 23889 : pd_special > BLCKSZ ||
1205 23889 : pd_special != MAXALIGN(pd_special))
6592 tgl 1206 UBC 0 : ereport(ERROR,
1207 : (errcode(ERRCODE_DATA_CORRUPTED),
1208 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1209 : pd_lower, pd_upper, pd_special)));
1210 :
1211 : /*
1212 : * Scan the line pointer array and build a list of just the ones we are
1213 : * going to keep. Notice we do not modify the page yet, since we are
1214 : * still validity-checking.
1215 : */
6592 tgl 1216 CBC 23889 : nline = PageGetMaxOffsetNumber(page);
1217 23889 : itemidptr = itemidbase;
1218 23889 : totallen = 0;
1219 23889 : nused = 0;
1220 23889 : nextitm = 0;
935 drowley 1221 23889 : last_offset = pd_special;
5444 bruce 1222 5303087 : for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
1223 : {
6592 tgl 1224 5279198 : lp = PageGetItemId(page, offnum);
5688 1225 5279198 : Assert(ItemIdHasStorage(lp));
6592 1226 5279198 : size = ItemIdGetLength(lp);
1227 5279198 : offset = ItemIdGetOffset(lp);
1228 5279198 : if (offset < pd_upper ||
1229 5279198 : (offset + size) > pd_special ||
1230 5279198 : offset != MAXALIGN(offset))
6592 tgl 1231 UBC 0 : ereport(ERROR,
1232 : (errcode(ERRCODE_DATA_CORRUPTED),
1233 : errmsg("corrupted line pointer: offset = %u, size = %u",
1234 : offset, (unsigned int) size)));
1235 :
6592 tgl 1236 CBC 5279198 : if (nextitm < nitems && offnum == itemnos[nextitm])
1237 : {
1238 : /* skip item to be deleted */
1239 1743298 : nextitm++;
1240 : }
1241 : else
1242 : {
2118 1243 3535900 : itemidptr->offsetindex = nused; /* where it will go */
6592 1244 3535900 : itemidptr->itemoff = offset;
1245 :
935 drowley 1246 3535900 : if (last_offset > itemidptr->itemoff)
1247 1040041 : last_offset = itemidptr->itemoff;
1248 : else
1249 2495859 : presorted = false;
1250 :
6592 tgl 1251 3535900 : itemidptr->alignedlen = MAXALIGN(size);
1252 3535900 : totallen += itemidptr->alignedlen;
2987 heikki.linnakangas 1253 3535900 : newitemids[nused] = *lp;
6592 tgl 1254 3535900 : itemidptr++;
1255 3535900 : nused++;
1256 : }
1257 : }
1258 :
1259 : /* this will catch invalid or out-of-order itemnos[] */
1260 23889 : if (nextitm != nitems)
6592 tgl 1261 UBC 0 : elog(ERROR, "incorrect index offsets supplied");
1262 :
6592 tgl 1263 CBC 23889 : if (totallen > (Size) (pd_special - pd_lower))
6592 tgl 1264 UBC 0 : ereport(ERROR,
1265 : (errcode(ERRCODE_DATA_CORRUPTED),
1266 : errmsg("corrupted item lengths: total %u, available space %u",
1267 : (unsigned int) totallen, pd_special - pd_lower)));
1268 :
1269 : /*
1270 : * Looks good. Overwrite the line pointers with the copy, from which we've
1271 : * removed all the unused items.
1272 : */
2987 heikki.linnakangas 1273 CBC 23889 : memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
6516 tgl 1274 23889 : phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
1275 :
1276 : /* and compactify the tuple data */
935 drowley 1277 23889 : if (nused > 0)
1278 23766 : compactify_tuples(itemidbase, nused, page, presorted);
1279 : else
1280 123 : phdr->pd_upper = pd_special;
1281 : }
1282 :
1283 :
1284 : /*
1285 : * PageIndexTupleDeleteNoCompact
1286 : *
1287 : * Remove the specified tuple from an index page, but set its line pointer
1288 : * to "unused" instead of compacting it out, except that it can be removed
1289 : * if it's the last line pointer on the page.
1290 : *
1291 : * This is used for index AMs that require that existing TIDs of live tuples
1292 : * remain unchanged, and are willing to allow unused line pointers instead.
1293 : */
1294 : void
2403 tgl 1295 339 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
1296 : {
3075 alvherre 1297 339 : PageHeader phdr = (PageHeader) page;
1298 : char *addr;
1299 : ItemId tup;
1300 : Size size;
1301 : unsigned offset;
1302 : int nline;
1303 :
1304 : /*
1305 : * As with PageRepairFragmentation, paranoia seems justified.
1306 : */
2403 tgl 1307 339 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1308 339 : phdr->pd_lower > phdr->pd_upper ||
1309 339 : phdr->pd_upper > phdr->pd_special ||
1310 339 : phdr->pd_special > BLCKSZ ||
1311 339 : phdr->pd_special != MAXALIGN(phdr->pd_special))
3075 alvherre 1312 UBC 0 : ereport(ERROR,
1313 : (errcode(ERRCODE_DATA_CORRUPTED),
1314 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1315 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1316 :
2403 tgl 1317 CBC 339 : nline = PageGetMaxOffsetNumber(page);
1318 339 : if ((int) offnum <= 0 || (int) offnum > nline)
2403 tgl 1319 UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1320 :
2403 tgl 1321 CBC 339 : tup = PageGetItemId(page, offnum);
1322 339 : Assert(ItemIdHasStorage(tup));
1323 339 : size = ItemIdGetLength(tup);
1324 339 : offset = ItemIdGetOffset(tup);
1325 :
1326 339 : if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
1327 339 : offset != MAXALIGN(offset))
2403 tgl 1328 UBC 0 : ereport(ERROR,
1329 : (errcode(ERRCODE_DATA_CORRUPTED),
1330 : errmsg("corrupted line pointer: offset = %u, size = %u",
1331 : offset, (unsigned int) size)));
1332 :
1333 : /* Amount of space to actually be deleted */
2403 tgl 1334 CBC 339 : size = MAXALIGN(size);
1335 :
1336 : /*
1337 : * Either set the line pointer to "unused", or zap it if it's the last
1338 : * one. (Note: it's possible that the next-to-last one(s) are already
1339 : * unused, but we do not trouble to try to compact them out if so.)
1340 : */
1341 339 : if ((int) offnum < nline)
1342 306 : ItemIdSetUnused(tup);
1343 : else
1344 : {
1345 33 : phdr->pd_lower -= sizeof(ItemIdData);
1346 33 : nline--; /* there's one less than when we started */
1347 : }
1348 :
1349 : /*
1350 : * Now move everything between the old upper bound (beginning of tuple
1351 : * space) and the beginning of the deleted tuple forward, so that space in
1352 : * the middle of the page is left free. If we've just deleted the tuple
1353 : * at the beginning of tuple space, then there's no need to do the copy.
1354 : */
1355 :
1356 : /* beginning of tuple space */
1357 339 : addr = (char *) page + phdr->pd_upper;
1358 :
1359 339 : if (offset > phdr->pd_upper)
1360 306 : memmove(addr + size, addr, offset - phdr->pd_upper);
1361 :
1362 : /* adjust free space boundary pointer */
1363 339 : phdr->pd_upper += size;
1364 :
1365 : /*
1366 : * Finally, we need to adjust the linp entries that remain.
1367 : *
1368 : * Anything that used to be before the deleted tuple's data was moved
1369 : * forward by the size of the deleted tuple.
1370 : */
1371 339 : if (!PageIsEmpty(page))
1372 : {
1373 : int i;
1374 :
1375 86527 : for (i = 1; i <= nline; i++)
1376 : {
272 peter 1377 GNC 86192 : ItemId ii = PageGetItemId(page, i);
1378 :
2403 tgl 1379 CBC 86192 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1380 42295 : ii->lp_off += size;
1381 : }
1382 : }
3075 alvherre 1383 339 : }
1384 :
1385 :
1386 : /*
1387 : * PageIndexTupleOverwrite
1388 : *
1389 : * Replace a specified tuple on an index page.
1390 : *
1391 : * The new tuple is placed exactly where the old one had been, shifting
1392 : * other tuples' data up or down as needed to keep the page compacted.
1393 : * This is better than deleting and reinserting the tuple, because it
1394 : * avoids any data shifting when the tuple size doesn't change; and
1395 : * even when it does, we avoid moving the line pointers around.
1396 : * This could be used by an index AM that doesn't want to unset the
1397 : * LP_DEAD bit when it happens to be set. It could conceivably also be
1398 : * used by an index AM that cares about the physical order of tuples as
1399 : * well as their logical/ItemId order.
1400 : *
1401 : * If there's insufficient space for the new tuple, return false. Other
1402 : * errors represent data-corruption problems, so we just elog.
1403 : */
1404 : bool
2403 tgl 1405 515191 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
1406 : Item newtup, Size newsize)
1407 : {
1408 515191 : PageHeader phdr = (PageHeader) page;
1409 : ItemId tupid;
1410 : int oldsize;
1411 : unsigned offset;
1412 : Size alignednewsize;
1413 : int size_diff;
1414 : int itemcount;
1415 :
1416 : /*
1417 : * As with PageRepairFragmentation, paranoia seems justified.
1418 : */
1419 515191 : if (phdr->pd_lower < SizeOfPageHeaderData ||
1420 515191 : phdr->pd_lower > phdr->pd_upper ||
1421 515191 : phdr->pd_upper > phdr->pd_special ||
1422 515191 : phdr->pd_special > BLCKSZ ||
1423 515191 : phdr->pd_special != MAXALIGN(phdr->pd_special))
2403 tgl 1424 UBC 0 : ereport(ERROR,
1425 : (errcode(ERRCODE_DATA_CORRUPTED),
1426 : errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
1427 : phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
1428 :
2403 tgl 1429 CBC 515191 : itemcount = PageGetMaxOffsetNumber(page);
1430 515191 : if ((int) offnum <= 0 || (int) offnum > itemcount)
2403 tgl 1431 UBC 0 : elog(ERROR, "invalid index offnum: %u", offnum);
1432 :
2403 tgl 1433 CBC 515191 : tupid = PageGetItemId(page, offnum);
1434 515191 : Assert(ItemIdHasStorage(tupid));
1435 515191 : oldsize = ItemIdGetLength(tupid);
1436 515191 : offset = ItemIdGetOffset(tupid);
1437 :
1438 515191 : if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
1439 515191 : offset != MAXALIGN(offset))
2403 tgl 1440 UBC 0 : ereport(ERROR,
1441 : (errcode(ERRCODE_DATA_CORRUPTED),
1442 : errmsg("corrupted line pointer: offset = %u, size = %u",
1443 : offset, (unsigned int) oldsize)));
1444 :
1445 : /*
1446 : * Determine actual change in space requirement, check for page overflow.
1447 : */
2403 tgl 1448 CBC 515191 : oldsize = MAXALIGN(oldsize);
1449 515191 : alignednewsize = MAXALIGN(newsize);
1450 515191 : if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
2403 tgl 1451 UBC 0 : return false;
1452 :
1453 : /*
1454 : * Relocate existing data and update line pointers, unless the new tuple
1455 : * is the same size as the old (after alignment), in which case there's
1456 : * nothing to do. Notice that what we have to relocate is data before the
1457 : * target tuple, not data after, so it's convenient to express size_diff
1458 : * as the amount by which the tuple's size is decreasing, making it the
1459 : * delta to add to pd_upper and affected line pointers.
1460 : */
2403 tgl 1461 CBC 515191 : size_diff = oldsize - (int) alignednewsize;
1462 515191 : if (size_diff != 0)
1463 : {
1464 113112 : char *addr = (char *) page + phdr->pd_upper;
1465 : int i;
1466 :
1467 : /* relocate all tuple data before the target tuple */
1468 113112 : memmove(addr + size_diff, addr, offset - phdr->pd_upper);
1469 :
1470 : /* adjust free space boundary pointer */
1471 113112 : phdr->pd_upper += size_diff;
1472 :
1473 : /* adjust affected line pointers too */
1474 23036090 : for (i = FirstOffsetNumber; i <= itemcount; i++)
1475 : {
272 peter 1476 GNC 22922978 : ItemId ii = PageGetItemId(page, i);
1477 :
1478 : /* Allow items without storage; currently only BRIN needs that */
2403 tgl 1479 CBC 22922978 : if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
1480 11315636 : ii->lp_off += size_diff;
1481 : }
1482 : }
1483 :
1484 : /* Update the item's tuple length without changing its lp_flags field */
1138 pg 1485 515191 : tupid->lp_off = offset + size_diff;
1486 515191 : tupid->lp_len = newsize;
1487 :
1488 : /* Copy new tuple data onto page */
2403 tgl 1489 515191 : memcpy(PageGetItem(page, tupid), newtup, newsize);
1490 :
1491 515191 : return true;
1492 : }
1493 :
1494 :
1495 : /*
1496 : * Set checksum for a page in shared buffers.
1497 : *
1498 : * If checksums are disabled, or if the page is not initialized, just return
1499 : * the input. Otherwise, we must make a copy of the page before calculating
1500 : * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
1501 : * from making the final checksum invalid. It doesn't matter if we include or
1502 : * exclude hints during the copy, as long as we write a valid page and
1503 : * associated checksum.
1504 : *
1505 : * Returns a pointer to the block-sized data that needs to be written. Uses
1506 : * statically-allocated memory, so the caller must immediately write the
1507 : * returned page and not refer to it again.
1508 : */
1509 : char *
3670 simon 1510 725820 : PageSetChecksumCopy(Page page, BlockNumber blkno)
1511 : {
1512 : static char *pageCopy = NULL;
1513 :
1514 : /* If we don't need a checksum, just return the passed-in data */
1826 magnus 1515 725820 : if (PageIsNew(page) || !DataChecksumsEnabled())
3670 simon 1516 724440 : return (char *) page;
1517 :
1518 : /*
1519 : * We allocate the copy space once and use it over on each subsequent
1520 : * call. The point of palloc'ing here, rather than having a static char
1521 : * array, is first to ensure adequate alignment for the checksumming code
1522 : * and second to avoid wasting space in processes that never call this.
1523 : */
3587 tgl 1524 1380 : if (pageCopy == NULL)
1 tmunro 1525 GNC 10 : pageCopy = MemoryContextAllocAligned(TopMemoryContext,
1526 : BLCKSZ,
1527 : PG_IO_ALIGN_SIZE,
1528 : 0);
1529 :
3587 tgl 1530 GIC 1380 : memcpy(pageCopy, (char *) page, BLCKSZ);
1531 1380 : ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
1532 1380 : return pageCopy;
3670 simon 1533 ECB : }
1534 :
1535 : /*
1536 : * Set checksum for a page in private memory.
1537 : *
1538 : * This must only be used when we know that no other process can be modifying
1539 : * the page buffer.
1540 : */
1541 : void
3670 simon 1542 GIC 133657 : PageSetChecksumInplace(Page page, BlockNumber blkno)
1543 : {
1544 : /* If we don't need a checksum, just return */
1826 magnus 1545 CBC 133657 : if (PageIsNew(page) || !DataChecksumsEnabled())
3670 simon 1546 GIC 133301 : return;
1547 :
3587 tgl 1548 CBC 356 : ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
3670 simon 1549 ECB : }
|