LCOV - differential code coverage report
Current view: top level - src/backend/storage/page - bufpage.c (source / functions) Coverage Total Hit UBC GIC GNC CBC ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 91.9 % 467 429 38 5 10 414 5 10
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 20 20 1 6 13 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * bufpage.c
       4                 :  *    POSTGRES standard buffer page code.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/storage/page/bufpage.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/htup_details.h"
      18                 : #include "access/itup.h"
      19                 : #include "access/xlog.h"
      20                 : #include "pgstat.h"
      21                 : #include "storage/checksum.h"
      22                 : #include "utils/memdebug.h"
      23                 : #include "utils/memutils.h"
      24                 : 
      25                 : 
      26                 : /* GUC variable */
      27                 : bool        ignore_checksum_failure = false;
      28                 : 
      29                 : 
      30                 : /* ----------------------------------------------------------------
      31                 :  *                      Page support functions
      32                 :  * ----------------------------------------------------------------
      33                 :  */
      34                 : 
      35                 : /*
      36                 :  * PageInit
      37                 :  *      Initializes the contents of a page.
      38                 :  *      Note that we don't calculate an initial checksum here; that's not done
      39                 :  *      until it's time to write.
      40                 :  */
      41                 : void
      42 CBC      603792 : PageInit(Page page, Size pageSize, Size specialSize)
      43                 : {
      44          603792 :     PageHeader  p = (PageHeader) page;
      45                 : 
      46          603792 :     specialSize = MAXALIGN(specialSize);
      47                 : 
      48          603792 :     Assert(pageSize == BLCKSZ);
      49          603792 :     Assert(pageSize > specialSize + SizeOfPageHeaderData);
      50                 : 
      51                 :     /* Make sure all fields of page are zero, as well as unused space */
      52          603792 :     MemSet(p, 0, pageSize);
      53                 : 
      54          603792 :     p->pd_flags = 0;
      55          603792 :     p->pd_lower = SizeOfPageHeaderData;
      56          603792 :     p->pd_upper = pageSize - specialSize;
      57          603792 :     p->pd_special = pageSize - specialSize;
      58          603792 :     PageSetPageSizeAndVersion(page, pageSize, PG_PAGE_LAYOUT_VERSION);
      59                 :     /* p->pd_prune_xid = InvalidTransactionId;       done by above MemSet */
      60          603792 : }
      61                 : 
      62                 : 
      63                 : /*
      64                 :  * PageIsVerifiedExtended
      65                 :  *      Check that the page header and checksum (if any) appear valid.
      66                 :  *
      67                 :  * This is called when a page has just been read in from disk.  The idea is
      68                 :  * to cheaply detect trashed pages before we go nuts following bogus line
      69                 :  * pointers, testing invalid transaction identifiers, etc.
      70                 :  *
      71                 :  * It turns out to be necessary to allow zeroed pages here too.  Even though
      72                 :  * this routine is *not* called when deliberately adding a page to a relation,
      73                 :  * there are scenarios in which a zeroed page might be found in a table.
      74                 :  * (Example: a backend extends a relation, then crashes before it can write
      75                 :  * any WAL entry about the new page.  The kernel will already have the
      76                 :  * zeroed page in the file, and it will stay that way after restart.)  So we
      77                 :  * allow zeroed pages here, and are careful that the page access macros
      78                 :  * treat such a page as empty and without free space.  Eventually, VACUUM
      79                 :  * will clean up such a page and make it usable.
      80                 :  *
      81                 :  * If flag PIV_LOG_WARNING is set, a WARNING is logged in the event of
      82                 :  * a checksum failure.
      83                 :  *
      84                 :  * If flag PIV_REPORT_STAT is set, a checksum failure is reported directly
      85                 :  * to pgstat.
      86                 :  */
      87                 : bool
      88         1310543 : PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags)
      89                 : {
      90         1310543 :     PageHeader  p = (PageHeader) page;
      91                 :     size_t     *pagebytes;
      92                 :     int         i;
      93         1310543 :     bool        checksum_failure = false;
      94         1310543 :     bool        header_sane = false;
      95         1310543 :     bool        all_zeroes = false;
      96         1310543 :     uint16      checksum = 0;
      97                 : 
      98                 :     /*
      99                 :      * Don't verify page data unless the page passes basic non-zero test
     100                 :      */
     101         1310543 :     if (!PageIsNew(page))
     102                 :     {
     103         1299221 :         if (DataChecksumsEnabled())
     104                 :         {
     105            1875 :             checksum = pg_checksum_page((char *) page, blkno);
     106                 : 
     107            1875 :             if (checksum != p->pd_checksum)
     108 UBC           0 :                 checksum_failure = true;
     109                 :         }
     110                 : 
     111                 :         /*
     112                 :          * The following checks don't prove the header is correct, only that
     113                 :          * it looks sane enough to allow into the buffer pool. Later usage of
     114                 :          * the block can still reveal problems, which is why we offer the
     115                 :          * checksum option.
     116                 :          */
     117 CBC     1299221 :         if ((p->pd_flags & ~PD_VALID_FLAG_BITS) == 0 &&
     118         1299221 :             p->pd_lower <= p->pd_upper &&
     119         1299221 :             p->pd_upper <= p->pd_special &&
     120         1299221 :             p->pd_special <= BLCKSZ &&
     121         1299221 :             p->pd_special == MAXALIGN(p->pd_special))
     122         1299221 :             header_sane = true;
     123                 : 
     124         1299221 :         if (header_sane && !checksum_failure)
     125         1299221 :             return true;
     126                 :     }
     127                 : 
     128                 :     /* Check all-zeroes case */
     129           11322 :     all_zeroes = true;
     130           11322 :     pagebytes = (size_t *) page;
     131        11605050 :     for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++)
     132                 :     {
     133        11593728 :         if (pagebytes[i] != 0)
     134                 :         {
     135 UBC           0 :             all_zeroes = false;
     136               0 :             break;
     137                 :         }
     138                 :     }
     139                 : 
     140 CBC       11322 :     if (all_zeroes)
     141           11322 :         return true;
     142                 : 
     143                 :     /*
     144                 :      * Throw a WARNING if the checksum fails, but only after we've checked for
     145                 :      * the all-zeroes case.
     146                 :      */
     147 UBC           0 :     if (checksum_failure)
     148                 :     {
     149               0 :         if ((flags & PIV_LOG_WARNING) != 0)
     150               0 :             ereport(WARNING,
     151                 :                     (errcode(ERRCODE_DATA_CORRUPTED),
     152                 :                      errmsg("page verification failed, calculated checksum %u but expected %u",
     153                 :                             checksum, p->pd_checksum)));
     154                 : 
     155               0 :         if ((flags & PIV_REPORT_STAT) != 0)
     156               0 :             pgstat_report_checksum_failure();
     157                 : 
     158               0 :         if (header_sane && ignore_checksum_failure)
     159               0 :             return true;
     160                 :     }
     161                 : 
     162               0 :     return false;
     163                 : }
     164                 : 
     165                 : 
     166                 : /*
     167                 :  *  PageAddItemExtended
     168                 :  *
     169                 :  *  Add an item to a page.  Return value is the offset at which it was
     170                 :  *  inserted, or InvalidOffsetNumber if the item is not inserted for any
     171                 :  *  reason.  A WARNING is issued indicating the reason for the refusal.
     172                 :  *
     173                 :  *  offsetNumber must be either InvalidOffsetNumber to specify finding a
     174                 :  *  free line pointer, or a value between FirstOffsetNumber and one past
     175                 :  *  the last existing item, to specify using that particular line pointer.
     176                 :  *
     177                 :  *  If offsetNumber is valid and flag PAI_OVERWRITE is set, we just store
     178                 :  *  the item at the specified offsetNumber, which must be either a
     179                 :  *  currently-unused line pointer, or one past the last existing item.
     180                 :  *
     181                 :  *  If offsetNumber is valid and flag PAI_OVERWRITE is not set, insert
     182                 :  *  the item at the specified offsetNumber, moving existing items later
     183                 :  *  in the array to make room.
     184                 :  *
     185                 :  *  If offsetNumber is not valid, then assign a slot by finding the first
     186                 :  *  one that is both unused and deallocated.
     187                 :  *
     188                 :  *  If flag PAI_IS_HEAP is set, we enforce that there can't be more than
     189                 :  *  MaxHeapTuplesPerPage line pointers on the page.
     190                 :  *
     191                 :  *  !!! EREPORT(ERROR) IS DISALLOWED HERE !!!
     192                 :  */
     193                 : OffsetNumber
     194 CBC    54412454 : PageAddItemExtended(Page page,
     195                 :                     Item item,
     196                 :                     Size size,
     197                 :                     OffsetNumber offsetNumber,
     198                 :                     int flags)
     199                 : {
     200        54412454 :     PageHeader  phdr = (PageHeader) page;
     201                 :     Size        alignedSize;
     202                 :     int         lower;
     203                 :     int         upper;
     204                 :     ItemId      itemId;
     205                 :     OffsetNumber limit;
     206        54412454 :     bool        needshuffle = false;
     207                 : 
     208                 :     /*
     209                 :      * Be wary about corrupted page pointers
     210                 :      */
     211        54412454 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
     212        54412454 :         phdr->pd_lower > phdr->pd_upper ||
     213        54412454 :         phdr->pd_upper > phdr->pd_special ||
     214        54412454 :         phdr->pd_special > BLCKSZ)
     215 UBC           0 :         ereport(PANIC,
     216                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
     217                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     218                 :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
     219                 : 
     220                 :     /*
     221                 :      * Select offsetNumber to place the new item at
     222                 :      */
     223 CBC    54412454 :     limit = OffsetNumberNext(PageGetMaxOffsetNumber(page));
     224                 : 
     225                 :     /* was offsetNumber passed in? */
     226        54412454 :     if (OffsetNumberIsValid(offsetNumber))
     227                 :     {
     228                 :         /* yes, check it */
     229        37705721 :         if ((flags & PAI_OVERWRITE) != 0)
     230                 :         {
     231         1473588 :             if (offsetNumber < limit)
     232                 :             {
     233 GNC        9463 :                 itemId = PageGetItemId(page, offsetNumber);
     234 CBC        9463 :                 if (ItemIdIsUsed(itemId) || ItemIdHasStorage(itemId))
     235                 :                 {
     236 UBC           0 :                     elog(WARNING, "will not overwrite a used ItemId");
     237               0 :                     return InvalidOffsetNumber;
     238                 :                 }
     239                 :             }
     240                 :         }
     241                 :         else
     242                 :         {
     243 CBC    36232133 :             if (offsetNumber < limit)
     244         4770945 :                 needshuffle = true; /* need to move existing linp's */
     245                 :         }
     246                 :     }
     247                 :     else
     248                 :     {
     249                 :         /* offsetNumber was not passed in, so find a free slot */
     250                 :         /* if no free slot, we'll put it at limit (1st open slot) */
     251 GNC    16706733 :         if (PageHasFreeLinePointers(page))
     252                 :         {
     253                 :             /*
     254                 :              * Scan line pointer array to locate a "recyclable" (unused)
     255                 :              * ItemId.
     256                 :              *
     257                 :              * Always use earlier items first.  PageTruncateLinePointerArray
     258                 :              * can only truncate unused items when they appear as a contiguous
     259                 :              * group at the end of the line pointer array.
     260                 :              */
     261 CBC      188051 :             for (offsetNumber = FirstOffsetNumber;
     262        14876819 :                  offsetNumber < limit;   /* limit is maxoff+1 */
     263        14688768 :                  offsetNumber++)
     264                 :             {
     265 GNC    14865919 :                 itemId = PageGetItemId(page, offsetNumber);
     266                 : 
     267                 :                 /*
     268                 :                  * We check for no storage as well, just to be paranoid;
     269                 :                  * unused items should never have storage.  Assert() that the
     270                 :                  * invariant is respected too.
     271                 :                  */
     272 CBC    14865919 :                 Assert(ItemIdIsUsed(itemId) || !ItemIdHasStorage(itemId));
     273                 : 
     274        14865919 :                 if (!ItemIdIsUsed(itemId) && !ItemIdHasStorage(itemId))
     275          177151 :                     break;
     276                 :             }
     277          188051 :             if (offsetNumber >= limit)
     278                 :             {
     279                 :                 /* the hint is wrong, so reset it */
     280 GNC       10900 :                 PageClearHasFreeLinePointers(page);
     281                 :             }
     282                 :         }
     283                 :         else
     284                 :         {
     285                 :             /* don't bother searching if hint says there's no free slot */
     286 CBC    16518682 :             offsetNumber = limit;
     287                 :         }
     288                 :     }
     289                 : 
     290                 :     /* Reject placing items beyond the first unused line pointer */
     291        54412454 :     if (offsetNumber > limit)
     292                 :     {
     293 UBC           0 :         elog(WARNING, "specified item offset is too large");
     294               0 :         return InvalidOffsetNumber;
     295                 :     }
     296                 : 
     297                 :     /* Reject placing items beyond heap boundary, if heap */
     298 CBC    54412454 :     if ((flags & PAI_IS_HEAP) != 0 && offsetNumber > MaxHeapTuplesPerPage)
     299                 :     {
     300 UBC           0 :         elog(WARNING, "can't put more than MaxHeapTuplesPerPage items in a heap page");
     301               0 :         return InvalidOffsetNumber;
     302                 :     }
     303                 : 
     304                 :     /*
     305                 :      * Compute new lower and upper pointers for page, see if it'll fit.
     306                 :      *
     307                 :      * Note: do arithmetic as signed ints, to avoid mistakes if, say,
     308                 :      * alignedSize > pd_upper.
     309                 :      */
     310 CBC    54412454 :     if (offsetNumber == limit || needshuffle)
     311        54225840 :         lower = phdr->pd_lower + sizeof(ItemIdData);
     312                 :     else
     313          186614 :         lower = phdr->pd_lower;
     314                 : 
     315        54412454 :     alignedSize = MAXALIGN(size);
     316                 : 
     317        54412454 :     upper = (int) phdr->pd_upper - (int) alignedSize;
     318                 : 
     319        54412454 :     if (lower > upper)
     320 UBC           0 :         return InvalidOffsetNumber;
     321                 : 
     322                 :     /*
     323                 :      * OK to insert the item.  First, shuffle the existing pointers if needed.
     324                 :      */
     325 GNC    54412454 :     itemId = PageGetItemId(page, offsetNumber);
     326                 : 
     327 CBC    54412454 :     if (needshuffle)
     328         4770945 :         memmove(itemId + 1, itemId,
     329         4770945 :                 (limit - offsetNumber) * sizeof(ItemIdData));
     330                 : 
     331                 :     /* set the line pointer */
     332        54412454 :     ItemIdSetNormal(itemId, upper, size);
     333                 : 
     334                 :     /*
     335                 :      * Items normally contain no uninitialized bytes.  Core bufpage consumers
     336                 :      * conform, but this is not a necessary coding rule; a new index AM could
     337                 :      * opt to depart from it.  However, data type input functions and other
     338                 :      * C-language functions that synthesize datums should initialize all
     339                 :      * bytes; datumIsEqual() relies on this.  Testing here, along with the
     340                 :      * similar check in printtup(), helps to catch such mistakes.
     341                 :      *
     342                 :      * Values of the "name" type retrieved via index-only scans may contain
     343                 :      * uninitialized bytes; see comment in btrescan().  Valgrind will report
     344                 :      * this as an error, but it is safe to ignore.
     345                 :      */
     346                 :     VALGRIND_CHECK_MEM_IS_DEFINED(item, size);
     347                 : 
     348                 :     /* copy the item's data onto the page */
     349        54412454 :     memcpy((char *) page + upper, item, size);
     350                 : 
     351                 :     /* adjust page header */
     352        54412454 :     phdr->pd_lower = (LocationIndex) lower;
     353        54412454 :     phdr->pd_upper = (LocationIndex) upper;
     354                 : 
     355        54412454 :     return offsetNumber;
     356                 : }
     357                 : 
     358                 : 
     359                 : /*
     360                 :  * PageGetTempPage
     361                 :  *      Get a temporary page in local memory for special processing.
     362                 :  *      The returned page is not initialized at all; caller must do that.
     363                 :  */
     364                 : Page
     365           23593 : PageGetTempPage(Page page)
     366                 : {
     367                 :     Size        pageSize;
     368                 :     Page        temp;
     369                 : 
     370           23593 :     pageSize = PageGetPageSize(page);
     371           23593 :     temp = (Page) palloc(pageSize);
     372                 : 
     373           23593 :     return temp;
     374                 : }
     375                 : 
     376                 : /*
     377                 :  * PageGetTempPageCopy
     378                 :  *      Get a temporary page in local memory for special processing.
     379                 :  *      The page is initialized by copying the contents of the given page.
     380                 :  */
     381                 : Page
     382            4413 : PageGetTempPageCopy(Page page)
     383                 : {
     384                 :     Size        pageSize;
     385                 :     Page        temp;
     386                 : 
     387            4413 :     pageSize = PageGetPageSize(page);
     388            4413 :     temp = (Page) palloc(pageSize);
     389                 : 
     390            4413 :     memcpy(temp, page, pageSize);
     391                 : 
     392            4413 :     return temp;
     393                 : }
     394                 : 
     395                 : /*
     396                 :  * PageGetTempPageCopySpecial
     397                 :  *      Get a temporary page in local memory for special processing.
     398                 :  *      The page is PageInit'd with the same special-space size as the
     399                 :  *      given page, and the special space is copied from the given page.
     400                 :  */
     401                 : Page
     402           44342 : PageGetTempPageCopySpecial(Page page)
     403                 : {
     404                 :     Size        pageSize;
     405                 :     Page        temp;
     406                 : 
     407           44342 :     pageSize = PageGetPageSize(page);
     408           44342 :     temp = (Page) palloc(pageSize);
     409                 : 
     410           44342 :     PageInit(temp, pageSize, PageGetSpecialSize(page));
     411           44342 :     memcpy(PageGetSpecialPointer(temp),
     412           44342 :            PageGetSpecialPointer(page),
     413           44342 :            PageGetSpecialSize(page));
     414                 : 
     415           44342 :     return temp;
     416                 : }
     417                 : 
     418                 : /*
     419                 :  * PageRestoreTempPage
     420                 :  *      Copy temporary page back to permanent page after special processing
     421                 :  *      and release the temporary page.
     422                 :  */
     423                 : void
     424           65016 : PageRestoreTempPage(Page tempPage, Page oldPage)
     425                 : {
     426                 :     Size        pageSize;
     427                 : 
     428           65016 :     pageSize = PageGetPageSize(tempPage);
     429           65016 :     memcpy((char *) oldPage, (char *) tempPage, pageSize);
     430                 : 
     431           65016 :     pfree(tempPage);
     432           65016 : }
     433                 : 
     434                 : /*
     435                 :  * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete
     436                 :  */
     437                 : typedef struct itemIdCompactData
     438                 : {
     439                 :     uint16      offsetindex;    /* linp array index */
     440                 :     int16       itemoff;        /* page offset of item data */
     441                 :     uint16      alignedlen;     /* MAXALIGN(item data len) */
     442                 : } itemIdCompactData;
     443                 : typedef itemIdCompactData *itemIdCompact;
     444                 : 
     445                 : /*
     446                 :  * After removing or marking some line pointers unused, move the tuples to
     447                 :  * remove the gaps caused by the removed items and reorder them back into
     448                 :  * reverse line pointer order in the page.
     449                 :  *
     450                 :  * This function can often be fairly hot, so it pays to take some measures to
     451                 :  * make it as optimal as possible.
     452                 :  *
     453                 :  * Callers may pass 'presorted' as true if the 'itemidbase' array is sorted in
     454                 :  * descending order of itemoff.  When this is true we can just memmove()
     455                 :  * tuples towards the end of the page.  This is quite a common case as it's
     456                 :  * the order that tuples are initially inserted into pages.  When we call this
     457                 :  * function to defragment the tuples in the page then any new line pointers
     458                 :  * added to the page will keep that presorted order, so hitting this case is
     459                 :  * still very common for tables that are commonly updated.
     460                 :  *
     461                 :  * When the 'itemidbase' array is not presorted then we're unable to just
     462                 :  * memmove() tuples around freely.  Doing so could cause us to overwrite the
     463                 :  * memory belonging to a tuple we've not moved yet.  In this case, we copy all
     464                 :  * the tuples that need to be moved into a temporary buffer.  We can then
     465                 :  * simply memcpy() out of that temp buffer back into the page at the correct
     466                 :  * location.  Tuples are copied back into the page in the same order as the
     467                 :  * 'itemidbase' array, so we end up reordering the tuples back into reverse
     468                 :  * line pointer order.  This will increase the chances of hitting the
     469                 :  * presorted case the next time around.
     470                 :  *
     471                 :  * Callers must ensure that nitems is > 0
     472                 :  */
     473                 : static void
     474          132565 : compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted)
     475                 : {
     476          132565 :     PageHeader  phdr = (PageHeader) page;
     477                 :     Offset      upper;
     478                 :     Offset      copy_tail;
     479                 :     Offset      copy_head;
     480                 :     itemIdCompact itemidptr;
     481                 :     int         i;
     482                 : 
     483                 :     /* Code within will not work correctly if nitems == 0 */
     484          132565 :     Assert(nitems > 0);
     485                 : 
     486          132565 :     if (presorted)
     487                 :     {
     488                 : 
     489                 : #ifdef USE_ASSERT_CHECKING
     490                 :         {
     491                 :             /*
     492                 :              * Verify we've not gotten any new callers that are incorrectly
     493                 :              * passing a true presorted value.
     494                 :              */
     495          103916 :             Offset      lastoff = phdr->pd_special;
     496                 : 
     497         4127687 :             for (i = 0; i < nitems; i++)
     498                 :             {
     499         4023771 :                 itemidptr = &itemidbase[i];
     500                 : 
     501         4023771 :                 Assert(lastoff > itemidptr->itemoff);
     502                 : 
     503         4023771 :                 lastoff = itemidptr->itemoff;
     504                 :             }
     505                 :         }
     506                 : #endif                          /* USE_ASSERT_CHECKING */
     507                 : 
     508                 :         /*
     509                 :          * 'itemidbase' is already in the optimal order, i.e, lower item
     510                 :          * pointers have a higher offset.  This allows us to memmove() the
     511                 :          * tuples up to the end of the page without having to worry about
     512                 :          * overwriting other tuples that have not been moved yet.
     513                 :          *
     514                 :          * There's a good chance that there are tuples already right at the
     515                 :          * end of the page that we can simply skip over because they're
     516                 :          * already in the correct location within the page.  We'll do that
     517                 :          * first...
     518                 :          */
     519          103916 :         upper = phdr->pd_special;
     520          103916 :         i = 0;
     521                 :         do
     522                 :         {
     523         1905885 :             itemidptr = &itemidbase[i];
     524         1905885 :             if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     525           99547 :                 break;
     526         1806338 :             upper -= itemidptr->alignedlen;
     527                 : 
     528         1806338 :             i++;
     529         1806338 :         } while (i < nitems);
     530                 : 
     531                 :         /*
     532                 :          * Now that we've found the first tuple that needs to be moved, we can
     533                 :          * do the tuple compactification.  We try and make the least number of
     534                 :          * memmove() calls and only call memmove() when there's a gap.  When
     535                 :          * we see a gap we just move all tuples after the gap up until the
     536                 :          * point of the last move operation.
     537                 :          */
     538          103916 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     539         2321349 :         for (; i < nitems; i++)
     540                 :         {
     541                 :             ItemId      lp;
     542                 : 
     543         2217433 :             itemidptr = &itemidbase[i];
     544         2217433 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     545                 : 
     546         2217433 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     547                 :             {
     548          147550 :                 memmove((char *) page + upper,
     549          147550 :                         page + copy_head,
     550          147550 :                         copy_tail - copy_head);
     551                 : 
     552                 :                 /*
     553                 :                  * We've now moved all tuples already seen, but not the
     554                 :                  * current tuple, so we set the copy_tail to the end of this
     555                 :                  * tuple so it can be moved in another iteration of the loop.
     556                 :                  */
     557          147550 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     558                 :             }
     559                 :             /* shift the target offset down by the length of this tuple */
     560         2217433 :             upper -= itemidptr->alignedlen;
     561                 :             /* point the copy_head to the start of this tuple */
     562         2217433 :             copy_head = itemidptr->itemoff;
     563                 : 
     564                 :             /* update the line pointer to reference the new offset */
     565         2217433 :             lp->lp_off = upper;
     566                 :         }
     567                 : 
     568                 :         /* move the remaining tuples. */
     569          103916 :         memmove((char *) page + upper,
     570          103916 :                 page + copy_head,
     571          103916 :                 copy_tail - copy_head);
     572                 :     }
     573                 :     else
     574                 :     {
     575                 :         PGAlignedBlock scratch;
     576           28649 :         char       *scratchptr = scratch.data;
     577                 : 
     578                 :         /*
     579                 :          * Non-presorted case:  The tuples in the itemidbase array may be in
     580                 :          * any order.  So, in order to move these to the end of the page we
     581                 :          * must make a temp copy of each tuple that needs to be moved before
     582                 :          * we copy them back into the page at the new offset.
     583                 :          *
     584                 :          * If a large percentage of tuples have been pruned (>75%) then we'll
     585                 :          * copy these into the temp buffer tuple-by-tuple, otherwise, we'll
     586                 :          * just do a single memcpy() for all tuples that need to be moved.
     587                 :          * When so many tuples have been removed there's likely to be a lot of
     588                 :          * gaps and it's unlikely that many non-movable tuples remain at the
     589                 :          * end of the page.
     590                 :          */
     591           28649 :         if (nitems < PageGetMaxOffsetNumber(page) / 4)
     592                 :         {
     593             506 :             i = 0;
     594                 :             do
     595                 :             {
     596            9068 :                 itemidptr = &itemidbase[i];
     597            9068 :                 memcpy(scratchptr + itemidptr->itemoff, page + itemidptr->itemoff,
     598            9068 :                        itemidptr->alignedlen);
     599            9068 :                 i++;
     600            9068 :             } while (i < nitems);
     601                 : 
     602                 :             /* Set things up for the compactification code below */
     603             506 :             i = 0;
     604             506 :             itemidptr = &itemidbase[0];
     605             506 :             upper = phdr->pd_special;
     606                 :         }
     607                 :         else
     608                 :         {
     609           28143 :             upper = phdr->pd_special;
     610                 : 
     611                 :             /*
     612                 :              * Many tuples are likely to already be in the correct location.
     613                 :              * There's no need to copy these into the temp buffer.  Instead
     614                 :              * we'll just skip forward in the itemidbase array to the position
     615                 :              * that we do need to move tuples from so that the code below just
     616                 :              * leaves these ones alone.
     617                 :              */
     618           28143 :             i = 0;
     619                 :             do
     620                 :             {
     621          472797 :                 itemidptr = &itemidbase[i];
     622          472797 :                 if (upper != itemidptr->itemoff + itemidptr->alignedlen)
     623           28143 :                     break;
     624          444654 :                 upper -= itemidptr->alignedlen;
     625                 : 
     626          444654 :                 i++;
     627          444654 :             } while (i < nitems);
     628                 : 
     629                 :             /* Copy all tuples that need to be moved into the temp buffer */
     630           28143 :             memcpy(scratchptr + phdr->pd_upper,
     631           28143 :                    page + phdr->pd_upper,
     632           28143 :                    upper - phdr->pd_upper);
     633                 :         }
     634                 : 
     635                 :         /*
     636                 :          * Do the tuple compactification.  itemidptr is already pointing to
     637                 :          * the first tuple that we're going to move.  Here we collapse the
     638                 :          * memcpy calls for adjacent tuples into a single call.  This is done
     639                 :          * by delaying the memcpy call until we find a gap that needs to be
     640                 :          * closed.
     641                 :          */
     642           28649 :         copy_tail = copy_head = itemidptr->itemoff + itemidptr->alignedlen;
     643         3539136 :         for (; i < nitems; i++)
     644                 :         {
     645                 :             ItemId      lp;
     646                 : 
     647         3510487 :             itemidptr = &itemidbase[i];
     648         3510487 :             lp = PageGetItemId(page, itemidptr->offsetindex + 1);
     649                 : 
     650                 :             /* copy pending tuples when we detect a gap */
     651         3510487 :             if (copy_head != itemidptr->itemoff + itemidptr->alignedlen)
     652                 :             {
     653          658478 :                 memcpy((char *) page + upper,
     654          658478 :                        scratchptr + copy_head,
     655          658478 :                        copy_tail - copy_head);
     656                 : 
     657                 :                 /*
     658                 :                  * We've now copied all tuples already seen, but not the
     659                 :                  * current tuple, so we set the copy_tail to the end of this
     660                 :                  * tuple.
     661                 :                  */
     662          658478 :                 copy_tail = itemidptr->itemoff + itemidptr->alignedlen;
     663                 :             }
     664                 :             /* shift the target offset down by the length of this tuple */
     665         3510487 :             upper -= itemidptr->alignedlen;
     666                 :             /* point the copy_head to the start of this tuple */
     667         3510487 :             copy_head = itemidptr->itemoff;
     668                 : 
     669                 :             /* update the line pointer to reference the new offset */
     670         3510487 :             lp->lp_off = upper;
     671                 :         }
     672                 : 
     673                 :         /* Copy the remaining chunk */
     674           28649 :         memcpy((char *) page + upper,
     675           28649 :                scratchptr + copy_head,
     676           28649 :                copy_tail - copy_head);
     677                 :     }
     678                 : 
     679          132565 :     phdr->pd_upper = upper;
     680          132565 : }
     681                 : 
     682                 : /*
     683                 :  * PageRepairFragmentation
     684                 :  *
     685                 :  * Frees fragmented space on a heap page following pruning.
     686                 :  *
     687                 :  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
     688                 :  *
     689                 :  * This routine removes unused line pointers from the end of the line pointer
     690                 :  * array.  This is possible when dead heap-only tuples get removed by pruning,
     691                 :  * especially when there were HOT chains with several tuples each beforehand.
     692                 :  *
     693                 :  * Caller had better have a full cleanup lock on page's buffer.  As a side
     694                 :  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
     695                 :  * needed.  Caller might also need to account for a reduction in the length of
     696                 :  * the line pointer array following array truncation.
     697                 :  */
     698                 : void
     699          118651 : PageRepairFragmentation(Page page)
     700                 : {
     701          118651 :     Offset      pd_lower = ((PageHeader) page)->pd_lower;
     702          118651 :     Offset      pd_upper = ((PageHeader) page)->pd_upper;
     703          118651 :     Offset      pd_special = ((PageHeader) page)->pd_special;
     704                 :     Offset      last_offset;
     705                 :     itemIdCompactData itemidbase[MaxHeapTuplesPerPage];
     706                 :     itemIdCompact itemidptr;
     707                 :     ItemId      lp;
     708                 :     int         nline,
     709                 :                 nstorage,
     710                 :                 nunused;
     711          118651 :     OffsetNumber finalusedlp = InvalidOffsetNumber;
     712                 :     int         i;
     713                 :     Size        totallen;
     714          118651 :     bool        presorted = true;   /* For now */
     715                 : 
     716                 :     /*
     717                 :      * It's worth the trouble to be more paranoid here than in most places,
     718                 :      * because we are about to reshuffle data in (what is usually) a shared
     719                 :      * disk buffer.  If we aren't careful then corrupted pointers, lengths,
     720                 :      * etc could cause us to clobber adjacent disk buffers, spreading the data
     721                 :      * loss further.  So, check everything.
     722                 :      */
     723          118651 :     if (pd_lower < SizeOfPageHeaderData ||
     724          118651 :         pd_lower > pd_upper ||
     725          118651 :         pd_upper > pd_special ||
     726          118651 :         pd_special > BLCKSZ ||
     727          118651 :         pd_special != MAXALIGN(pd_special))
     728 UBC           0 :         ereport(ERROR,
     729                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
     730                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
     731                 :                         pd_lower, pd_upper, pd_special)));
     732                 : 
     733                 :     /*
     734                 :      * Run through the line pointer array and collect data about live items.
     735                 :      */
     736 CBC      118651 :     nline = PageGetMaxOffsetNumber(page);
     737          118651 :     itemidptr = itemidbase;
     738          118651 :     nunused = totallen = 0;
     739          118651 :     last_offset = pd_special;
     740         7826282 :     for (i = FirstOffsetNumber; i <= nline; i++)
     741                 :     {
     742         7707631 :         lp = PageGetItemId(page, i);
     743         7707631 :         if (ItemIdIsUsed(lp))
     744                 :         {
     745         7507032 :             if (ItemIdHasStorage(lp))
     746                 :             {
     747         4443012 :                 itemidptr->offsetindex = i - 1;
     748         4443012 :                 itemidptr->itemoff = ItemIdGetOffset(lp);
     749                 : 
     750         4443012 :                 if (last_offset > itemidptr->itemoff)
     751         3946432 :                     last_offset = itemidptr->itemoff;
     752                 :                 else
     753          496580 :                     presorted = false;
     754                 : 
     755         4443012 :                 if (unlikely(itemidptr->itemoff < (int) pd_upper ||
     756                 :                              itemidptr->itemoff >= (int) pd_special))
     757 UBC           0 :                     ereport(ERROR,
     758                 :                             (errcode(ERRCODE_DATA_CORRUPTED),
     759                 :                              errmsg("corrupted line pointer: %u",
     760                 :                                     itemidptr->itemoff)));
     761 CBC     4443012 :                 itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
     762         4443012 :                 totallen += itemidptr->alignedlen;
     763         4443012 :                 itemidptr++;
     764                 :             }
     765                 : 
     766         7507032 :             finalusedlp = i;    /* Could be the final non-LP_UNUSED item */
     767                 :         }
     768                 :         else
     769                 :         {
     770                 :             /* Unused entries should have lp_len = 0, but make sure */
     771          200599 :             Assert(!ItemIdHasStorage(lp));
     772          200599 :             ItemIdSetUnused(lp);
     773          200599 :             nunused++;
     774                 :         }
     775                 :     }
     776                 : 
     777          118651 :     nstorage = itemidptr - itemidbase;
     778          118651 :     if (nstorage == 0)
     779                 :     {
     780                 :         /* Page is completely empty, so just reset it quickly */
     781            9852 :         ((PageHeader) page)->pd_upper = pd_special;
     782                 :     }
     783                 :     else
     784                 :     {
     785                 :         /* Need to compact the page the hard way */
     786          108799 :         if (totallen > (Size) (pd_special - pd_lower))
     787 UBC           0 :             ereport(ERROR,
     788                 :                     (errcode(ERRCODE_DATA_CORRUPTED),
     789                 :                      errmsg("corrupted item lengths: total %u, available space %u",
     790                 :                             (unsigned int) totallen, pd_special - pd_lower)));
     791                 : 
     792 CBC      108799 :         compactify_tuples(itemidbase, nstorage, page, presorted);
     793                 :     }
     794                 : 
     795          118651 :     if (finalusedlp != nline)
     796                 :     {
     797                 :         /* The last line pointer is not the last used line pointer */
     798            1253 :         int         nunusedend = nline - finalusedlp;
     799                 : 
     800            1253 :         Assert(nunused >= nunusedend && nunusedend > 0);
     801                 : 
     802                 :         /* remove trailing unused line pointers from the count */
     803            1253 :         nunused -= nunusedend;
     804                 :         /* truncate the line pointer array */
     805            1253 :         ((PageHeader) page)->pd_lower -= (sizeof(ItemIdData) * nunusedend);
     806                 :     }
     807                 : 
     808                 :     /* Set hint bit for PageAddItemExtended */
     809          118651 :     if (nunused > 0)
     810           21189 :         PageSetHasFreeLinePointers(page);
     811                 :     else
     812           97462 :         PageClearHasFreeLinePointers(page);
     813          118651 : }
     814                 : 
     815                 : /*
     816                 :  * PageTruncateLinePointerArray
     817                 :  *
     818                 :  * Removes unused line pointers at the end of the line pointer array.
     819                 :  *
     820                 :  * This routine is usable for heap pages only.  It is called by VACUUM during
     821                 :  * its second pass over the heap.  We expect at least one LP_UNUSED line
     822                 :  * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
     823                 :  * it just set to LP_UNUSED then it should not call here).
     824                 :  *
     825                 :  * We avoid truncating the line pointer array to 0 items, if necessary by
     826                 :  * leaving behind a single remaining LP_UNUSED item.  This is a little
     827                 :  * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
     828                 :  * page behind.
     829                 :  *
     830                 :  * Caller can have either an exclusive lock or a full cleanup lock on page's
     831                 :  * buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset based
     832                 :  * on whether or not we leave behind any remaining LP_UNUSED items.
     833                 :  */
     834                 : void
     835           26828 : PageTruncateLinePointerArray(Page page)
     836                 : {
     837           26828 :     PageHeader  phdr = (PageHeader) page;
     838           26828 :     bool        countdone = false,
     839           26828 :                 sethint = false;
     840           26828 :     int         nunusedend = 0;
     841                 : 
     842                 :     /* Scan line pointer array back-to-front */
     843         1037172 :     for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
     844                 :     {
     845         1036899 :         ItemId      lp = PageGetItemId(page, i);
     846                 : 
     847         1036899 :         if (!countdone && i > FirstOffsetNumber)
     848                 :         {
     849                 :             /*
     850                 :              * Still determining which line pointers from the end of the array
     851                 :              * will be truncated away.  Either count another line pointer as
     852                 :              * safe to truncate, or notice that it's not safe to truncate
     853                 :              * additional line pointers (stop counting line pointers).
     854                 :              */
     855          724761 :             if (!ItemIdIsUsed(lp))
     856          704129 :                 nunusedend++;
     857                 :             else
     858           20632 :                 countdone = true;
     859                 :         }
     860                 :         else
     861                 :         {
     862                 :             /*
     863                 :              * Once we've stopped counting we still need to figure out if
     864                 :              * there are any remaining LP_UNUSED line pointers somewhere more
     865                 :              * towards the front of the array.
     866                 :              */
     867          312138 :             if (!ItemIdIsUsed(lp))
     868                 :             {
     869                 :                 /*
     870                 :                  * This is an unused line pointer that we won't be truncating
     871                 :                  * away -- so there is at least one.  Set hint on page.
     872                 :                  */
     873           26555 :                 sethint = true;
     874           26555 :                 break;
     875                 :             }
     876                 :         }
     877                 :     }
     878                 : 
     879           26828 :     if (nunusedend > 0)
     880                 :     {
     881            8048 :         phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
     882                 : 
     883                 : #ifdef CLOBBER_FREED_MEMORY
     884            8048 :         memset((char *) page + phdr->pd_lower, 0x7F,
     885                 :                sizeof(ItemIdData) * nunusedend);
     886                 : #endif
     887                 :     }
     888                 :     else
     889           18780 :         Assert(sethint);
     890                 : 
     891                 :     /* Set hint bit for PageAddItemExtended */
     892           26828 :     if (sethint)
     893           26555 :         PageSetHasFreeLinePointers(page);
     894                 :     else
     895             273 :         PageClearHasFreeLinePointers(page);
     896           26828 : }
     897                 : 
     898                 : /*
     899                 :  * PageGetFreeSpace
     900                 :  *      Returns the size of the free (allocatable) space on a page,
     901                 :  *      reduced by the space needed for a new line pointer.
     902                 :  *
     903                 :  * Note: this should usually only be used on index pages.  Use
     904                 :  * PageGetHeapFreeSpace on heap pages.
     905                 :  */
     906                 : Size
     907        46099439 : PageGetFreeSpace(Page page)
     908                 : {
     909                 :     int         space;
     910                 : 
     911                 :     /*
     912                 :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     913                 :      * pd_upper.
     914                 :      */
     915        46099439 :     space = (int) ((PageHeader) page)->pd_upper -
     916        46099439 :         (int) ((PageHeader) page)->pd_lower;
     917                 : 
     918        46099439 :     if (space < (int) sizeof(ItemIdData))
     919           15056 :         return 0;
     920        46084383 :     space -= sizeof(ItemIdData);
     921                 : 
     922        46084383 :     return (Size) space;
     923                 : }
     924                 : 
     925                 : /*
     926                 :  * PageGetFreeSpaceForMultipleTuples
     927                 :  *      Returns the size of the free (allocatable) space on a page,
     928                 :  *      reduced by the space needed for multiple new line pointers.
     929                 :  *
     930                 :  * Note: this should usually only be used on index pages.  Use
     931                 :  * PageGetHeapFreeSpace on heap pages.
     932                 :  */
     933                 : Size
     934           63637 : PageGetFreeSpaceForMultipleTuples(Page page, int ntups)
     935                 : {
     936                 :     int         space;
     937                 : 
     938                 :     /*
     939                 :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     940                 :      * pd_upper.
     941                 :      */
     942           63637 :     space = (int) ((PageHeader) page)->pd_upper -
     943           63637 :         (int) ((PageHeader) page)->pd_lower;
     944                 : 
     945           63637 :     if (space < (int) (ntups * sizeof(ItemIdData)))
     946 UBC           0 :         return 0;
     947 CBC       63637 :     space -= ntups * sizeof(ItemIdData);
     948                 : 
     949           63637 :     return (Size) space;
     950                 : }
     951                 : 
     952                 : /*
     953                 :  * PageGetExactFreeSpace
     954                 :  *      Returns the size of the free (allocatable) space on a page,
     955                 :  *      without any consideration for adding/removing line pointers.
     956                 :  */
     957                 : Size
     958         1662320 : PageGetExactFreeSpace(Page page)
     959                 : {
     960                 :     int         space;
     961                 : 
     962                 :     /*
     963                 :      * Use signed arithmetic here so that we behave sensibly if pd_lower >
     964                 :      * pd_upper.
     965                 :      */
     966         1662320 :     space = (int) ((PageHeader) page)->pd_upper -
     967         1662320 :         (int) ((PageHeader) page)->pd_lower;
     968                 : 
     969         1662320 :     if (space < 0)
     970 UBC           0 :         return 0;
     971                 : 
     972 CBC     1662320 :     return (Size) space;
     973                 : }
     974                 : 
     975                 : 
     976                 : /*
     977                 :  * PageGetHeapFreeSpace
     978                 :  *      Returns the size of the free (allocatable) space on a page,
     979                 :  *      reduced by the space needed for a new line pointer.
     980                 :  *
     981                 :  * The difference between this and PageGetFreeSpace is that this will return
     982                 :  * zero if there are already MaxHeapTuplesPerPage line pointers in the page
     983                 :  * and none are free.  We use this to enforce that no more than
     984                 :  * MaxHeapTuplesPerPage line pointers are created on a heap page.  (Although
     985                 :  * no more tuples than that could fit anyway, in the presence of redirected
     986                 :  * or dead line pointers it'd be possible to have too many line pointers.
     987                 :  * To avoid breaking code that assumes MaxHeapTuplesPerPage is a hard limit
     988                 :  * on the number of line pointers, we make this extra check.)
     989                 :  */
     990                 : Size
     991        18756254 : PageGetHeapFreeSpace(Page page)
     992                 : {
     993                 :     Size        space;
     994                 : 
     995        18756254 :     space = PageGetFreeSpace(page);
     996        18756254 :     if (space > 0)
     997                 :     {
     998                 :         OffsetNumber offnum,
     999                 :                     nline;
    1000                 : 
    1001                 :         /*
    1002                 :          * Are there already MaxHeapTuplesPerPage line pointers in the page?
    1003                 :          */
    1004        18732165 :         nline = PageGetMaxOffsetNumber(page);
    1005        18732165 :         if (nline >= MaxHeapTuplesPerPage)
    1006                 :         {
    1007 GNC        1893 :             if (PageHasFreeLinePointers(page))
    1008                 :             {
    1009                 :                 /*
    1010                 :                  * Since this is just a hint, we must confirm that there is
    1011                 :                  * indeed a free line pointer
    1012                 :                  */
    1013 CBC      165286 :                 for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1014                 :                 {
    1015          165230 :                     ItemId      lp = PageGetItemId(page, offnum);
    1016                 : 
    1017          165230 :                     if (!ItemIdIsUsed(lp))
    1018             711 :                         break;
    1019                 :                 }
    1020                 : 
    1021             767 :                 if (offnum > nline)
    1022                 :                 {
    1023                 :                     /*
    1024                 :                      * The hint is wrong, but we can't clear it here since we
    1025                 :                      * don't have the ability to mark the page dirty.
    1026                 :                      */
    1027              56 :                     space = 0;
    1028                 :                 }
    1029                 :             }
    1030                 :             else
    1031                 :             {
    1032                 :                 /*
    1033                 :                  * Although the hint might be wrong, PageAddItem will believe
    1034                 :                  * it anyway, so we must believe it too.
    1035                 :                  */
    1036            1126 :                 space = 0;
    1037                 :             }
    1038                 :         }
    1039                 :     }
    1040        18756254 :     return space;
    1041                 : }
    1042                 : 
    1043                 : 
    1044                 : /*
    1045                 :  * PageIndexTupleDelete
    1046                 :  *
    1047                 :  * This routine does the work of removing a tuple from an index page.
    1048                 :  *
    1049                 :  * Unlike heap pages, we compact out the line pointer for the removed tuple.
    1050                 :  */
    1051                 : void
    1052          413620 : PageIndexTupleDelete(Page page, OffsetNumber offnum)
    1053                 : {
    1054          413620 :     PageHeader  phdr = (PageHeader) page;
    1055                 :     char       *addr;
    1056                 :     ItemId      tup;
    1057                 :     Size        size;
    1058                 :     unsigned    offset;
    1059                 :     int         nbytes;
    1060                 :     int         offidx;
    1061                 :     int         nline;
    1062                 : 
    1063                 :     /*
    1064                 :      * As with PageRepairFragmentation, paranoia seems justified.
    1065                 :      */
    1066          413620 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1067          413620 :         phdr->pd_lower > phdr->pd_upper ||
    1068          413620 :         phdr->pd_upper > phdr->pd_special ||
    1069          413620 :         phdr->pd_special > BLCKSZ ||
    1070          413620 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1071 UBC           0 :         ereport(ERROR,
    1072                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1073                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1074                 :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1075                 : 
    1076 CBC      413620 :     nline = PageGetMaxOffsetNumber(page);
    1077          413620 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1078 UBC           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1079                 : 
    1080                 :     /* change offset number to offset index */
    1081 CBC      413620 :     offidx = offnum - 1;
    1082                 : 
    1083          413620 :     tup = PageGetItemId(page, offnum);
    1084          413620 :     Assert(ItemIdHasStorage(tup));
    1085          413620 :     size = ItemIdGetLength(tup);
    1086          413620 :     offset = ItemIdGetOffset(tup);
    1087                 : 
    1088          413620 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1089          413620 :         offset != MAXALIGN(offset))
    1090 UBC           0 :         ereport(ERROR,
    1091                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1092                 :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1093                 :                         offset, (unsigned int) size)));
    1094                 : 
    1095                 :     /* Amount of space to actually be deleted */
    1096 CBC      413620 :     size = MAXALIGN(size);
    1097                 : 
    1098                 :     /*
    1099                 :      * First, we want to get rid of the pd_linp entry for the index tuple. We
    1100                 :      * copy all subsequent linp's back one slot in the array. We don't use
    1101                 :      * PageGetItemId, because we are manipulating the _array_, not individual
    1102                 :      * linp's.
    1103                 :      */
    1104          413620 :     nbytes = phdr->pd_lower -
    1105          413620 :         ((char *) &phdr->pd_linp[offidx + 1] - (char *) phdr);
    1106                 : 
    1107          413620 :     if (nbytes > 0)
    1108          404270 :         memmove((char *) &(phdr->pd_linp[offidx]),
    1109          404270 :                 (char *) &(phdr->pd_linp[offidx + 1]),
    1110                 :                 nbytes);
    1111                 : 
    1112                 :     /*
    1113                 :      * Now move everything between the old upper bound (beginning of tuple
    1114                 :      * space) and the beginning of the deleted tuple forward, so that space in
    1115                 :      * the middle of the page is left free.  If we've just deleted the tuple
    1116                 :      * at the beginning of tuple space, then there's no need to do the copy.
    1117                 :      */
    1118                 : 
    1119                 :     /* beginning of tuple space */
    1120          413620 :     addr = (char *) page + phdr->pd_upper;
    1121                 : 
    1122          413620 :     if (offset > phdr->pd_upper)
    1123          403438 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1124                 : 
    1125                 :     /* adjust free space boundary pointers */
    1126          413620 :     phdr->pd_upper += size;
    1127          413620 :     phdr->pd_lower -= sizeof(ItemIdData);
    1128                 : 
    1129                 :     /*
    1130                 :      * Finally, we need to adjust the linp entries that remain.
    1131                 :      *
    1132                 :      * Anything that used to be before the deleted tuple's data was moved
    1133                 :      * forward by the size of the deleted tuple.
    1134                 :      */
    1135          413620 :     if (!PageIsEmpty(page))
    1136                 :     {
    1137                 :         int         i;
    1138                 : 
    1139          412822 :         nline--;                /* there's one less than when we started */
    1140        72556087 :         for (i = 1; i <= nline; i++)
    1141                 :         {
    1142 GNC    72143265 :             ItemId      ii = PageGetItemId(page, i);
    1143                 : 
    1144 CBC    72143265 :             Assert(ItemIdHasStorage(ii));
    1145        72143265 :             if (ItemIdGetOffset(ii) <= offset)
    1146        47227724 :                 ii->lp_off += size;
    1147                 :         }
    1148                 :     }
    1149          413620 : }
    1150                 : 
    1151                 : 
    1152                 : /*
    1153                 :  * PageIndexMultiDelete
    1154                 :  *
    1155                 :  * This routine handles the case of deleting multiple tuples from an
    1156                 :  * index page at once.  It is considerably faster than a loop around
    1157                 :  * PageIndexTupleDelete ... however, the caller *must* supply the array
    1158                 :  * of item numbers to be deleted in item number order!
    1159                 :  */
    1160                 : void
    1161           29721 : PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems)
    1162                 : {
    1163           29721 :     PageHeader  phdr = (PageHeader) page;
    1164           29721 :     Offset      pd_lower = phdr->pd_lower;
    1165           29721 :     Offset      pd_upper = phdr->pd_upper;
    1166           29721 :     Offset      pd_special = phdr->pd_special;
    1167                 :     Offset      last_offset;
    1168                 :     itemIdCompactData itemidbase[MaxIndexTuplesPerPage];
    1169                 :     ItemIdData  newitemids[MaxIndexTuplesPerPage];
    1170                 :     itemIdCompact itemidptr;
    1171                 :     ItemId      lp;
    1172                 :     int         nline,
    1173                 :                 nused;
    1174                 :     Size        totallen;
    1175                 :     Size        size;
    1176                 :     unsigned    offset;
    1177                 :     int         nextitm;
    1178                 :     OffsetNumber offnum;
    1179           29721 :     bool        presorted = true;   /* For now */
    1180                 : 
    1181           29721 :     Assert(nitems <= MaxIndexTuplesPerPage);
    1182                 : 
    1183                 :     /*
    1184                 :      * If there aren't very many items to delete, then retail
    1185                 :      * PageIndexTupleDelete is the best way.  Delete the items in reverse
    1186                 :      * order so we don't have to think about adjusting item numbers for
    1187                 :      * previous deletions.
    1188                 :      *
    1189                 :      * TODO: tune the magic number here
    1190                 :      */
    1191           29721 :     if (nitems <= 2)
    1192                 :     {
    1193           14213 :         while (--nitems >= 0)
    1194            8381 :             PageIndexTupleDelete(page, itemnos[nitems]);
    1195            5832 :         return;
    1196                 :     }
    1197                 : 
    1198                 :     /*
    1199                 :      * As with PageRepairFragmentation, paranoia seems justified.
    1200                 :      */
    1201           23889 :     if (pd_lower < SizeOfPageHeaderData ||
    1202           23889 :         pd_lower > pd_upper ||
    1203           23889 :         pd_upper > pd_special ||
    1204           23889 :         pd_special > BLCKSZ ||
    1205           23889 :         pd_special != MAXALIGN(pd_special))
    1206 UBC           0 :         ereport(ERROR,
    1207                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1208                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1209                 :                         pd_lower, pd_upper, pd_special)));
    1210                 : 
    1211                 :     /*
    1212                 :      * Scan the line pointer array and build a list of just the ones we are
    1213                 :      * going to keep.  Notice we do not modify the page yet, since we are
    1214                 :      * still validity-checking.
    1215                 :      */
    1216 CBC       23889 :     nline = PageGetMaxOffsetNumber(page);
    1217           23889 :     itemidptr = itemidbase;
    1218           23889 :     totallen = 0;
    1219           23889 :     nused = 0;
    1220           23889 :     nextitm = 0;
    1221           23889 :     last_offset = pd_special;
    1222         5303087 :     for (offnum = FirstOffsetNumber; offnum <= nline; offnum = OffsetNumberNext(offnum))
    1223                 :     {
    1224         5279198 :         lp = PageGetItemId(page, offnum);
    1225         5279198 :         Assert(ItemIdHasStorage(lp));
    1226         5279198 :         size = ItemIdGetLength(lp);
    1227         5279198 :         offset = ItemIdGetOffset(lp);
    1228         5279198 :         if (offset < pd_upper ||
    1229         5279198 :             (offset + size) > pd_special ||
    1230         5279198 :             offset != MAXALIGN(offset))
    1231 UBC           0 :             ereport(ERROR,
    1232                 :                     (errcode(ERRCODE_DATA_CORRUPTED),
    1233                 :                      errmsg("corrupted line pointer: offset = %u, size = %u",
    1234                 :                             offset, (unsigned int) size)));
    1235                 : 
    1236 CBC     5279198 :         if (nextitm < nitems && offnum == itemnos[nextitm])
    1237                 :         {
    1238                 :             /* skip item to be deleted */
    1239         1743298 :             nextitm++;
    1240                 :         }
    1241                 :         else
    1242                 :         {
    1243         3535900 :             itemidptr->offsetindex = nused; /* where it will go */
    1244         3535900 :             itemidptr->itemoff = offset;
    1245                 : 
    1246         3535900 :             if (last_offset > itemidptr->itemoff)
    1247         1040041 :                 last_offset = itemidptr->itemoff;
    1248                 :             else
    1249         2495859 :                 presorted = false;
    1250                 : 
    1251         3535900 :             itemidptr->alignedlen = MAXALIGN(size);
    1252         3535900 :             totallen += itemidptr->alignedlen;
    1253         3535900 :             newitemids[nused] = *lp;
    1254         3535900 :             itemidptr++;
    1255         3535900 :             nused++;
    1256                 :         }
    1257                 :     }
    1258                 : 
    1259                 :     /* this will catch invalid or out-of-order itemnos[] */
    1260           23889 :     if (nextitm != nitems)
    1261 UBC           0 :         elog(ERROR, "incorrect index offsets supplied");
    1262                 : 
    1263 CBC       23889 :     if (totallen > (Size) (pd_special - pd_lower))
    1264 UBC           0 :         ereport(ERROR,
    1265                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1266                 :                  errmsg("corrupted item lengths: total %u, available space %u",
    1267                 :                         (unsigned int) totallen, pd_special - pd_lower)));
    1268                 : 
    1269                 :     /*
    1270                 :      * Looks good. Overwrite the line pointers with the copy, from which we've
    1271                 :      * removed all the unused items.
    1272                 :      */
    1273 CBC       23889 :     memcpy(phdr->pd_linp, newitemids, nused * sizeof(ItemIdData));
    1274           23889 :     phdr->pd_lower = SizeOfPageHeaderData + nused * sizeof(ItemIdData);
    1275                 : 
    1276                 :     /* and compactify the tuple data */
    1277           23889 :     if (nused > 0)
    1278           23766 :         compactify_tuples(itemidbase, nused, page, presorted);
    1279                 :     else
    1280             123 :         phdr->pd_upper = pd_special;
    1281                 : }
    1282                 : 
    1283                 : 
    1284                 : /*
    1285                 :  * PageIndexTupleDeleteNoCompact
    1286                 :  *
    1287                 :  * Remove the specified tuple from an index page, but set its line pointer
    1288                 :  * to "unused" instead of compacting it out, except that it can be removed
    1289                 :  * if it's the last line pointer on the page.
    1290                 :  *
    1291                 :  * This is used for index AMs that require that existing TIDs of live tuples
    1292                 :  * remain unchanged, and are willing to allow unused line pointers instead.
    1293                 :  */
    1294                 : void
    1295             339 : PageIndexTupleDeleteNoCompact(Page page, OffsetNumber offnum)
    1296                 : {
    1297             339 :     PageHeader  phdr = (PageHeader) page;
    1298                 :     char       *addr;
    1299                 :     ItemId      tup;
    1300                 :     Size        size;
    1301                 :     unsigned    offset;
    1302                 :     int         nline;
    1303                 : 
    1304                 :     /*
    1305                 :      * As with PageRepairFragmentation, paranoia seems justified.
    1306                 :      */
    1307             339 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1308             339 :         phdr->pd_lower > phdr->pd_upper ||
    1309             339 :         phdr->pd_upper > phdr->pd_special ||
    1310             339 :         phdr->pd_special > BLCKSZ ||
    1311             339 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1312 UBC           0 :         ereport(ERROR,
    1313                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1314                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1315                 :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1316                 : 
    1317 CBC         339 :     nline = PageGetMaxOffsetNumber(page);
    1318             339 :     if ((int) offnum <= 0 || (int) offnum > nline)
    1319 UBC           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1320                 : 
    1321 CBC         339 :     tup = PageGetItemId(page, offnum);
    1322             339 :     Assert(ItemIdHasStorage(tup));
    1323             339 :     size = ItemIdGetLength(tup);
    1324             339 :     offset = ItemIdGetOffset(tup);
    1325                 : 
    1326             339 :     if (offset < phdr->pd_upper || (offset + size) > phdr->pd_special ||
    1327             339 :         offset != MAXALIGN(offset))
    1328 UBC           0 :         ereport(ERROR,
    1329                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1330                 :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1331                 :                         offset, (unsigned int) size)));
    1332                 : 
    1333                 :     /* Amount of space to actually be deleted */
    1334 CBC         339 :     size = MAXALIGN(size);
    1335                 : 
    1336                 :     /*
    1337                 :      * Either set the line pointer to "unused", or zap it if it's the last
    1338                 :      * one.  (Note: it's possible that the next-to-last one(s) are already
    1339                 :      * unused, but we do not trouble to try to compact them out if so.)
    1340                 :      */
    1341             339 :     if ((int) offnum < nline)
    1342             306 :         ItemIdSetUnused(tup);
    1343                 :     else
    1344                 :     {
    1345              33 :         phdr->pd_lower -= sizeof(ItemIdData);
    1346              33 :         nline--;                /* there's one less than when we started */
    1347                 :     }
    1348                 : 
    1349                 :     /*
    1350                 :      * Now move everything between the old upper bound (beginning of tuple
    1351                 :      * space) and the beginning of the deleted tuple forward, so that space in
    1352                 :      * the middle of the page is left free.  If we've just deleted the tuple
    1353                 :      * at the beginning of tuple space, then there's no need to do the copy.
    1354                 :      */
    1355                 : 
    1356                 :     /* beginning of tuple space */
    1357             339 :     addr = (char *) page + phdr->pd_upper;
    1358                 : 
    1359             339 :     if (offset > phdr->pd_upper)
    1360             306 :         memmove(addr + size, addr, offset - phdr->pd_upper);
    1361                 : 
    1362                 :     /* adjust free space boundary pointer */
    1363             339 :     phdr->pd_upper += size;
    1364                 : 
    1365                 :     /*
    1366                 :      * Finally, we need to adjust the linp entries that remain.
    1367                 :      *
    1368                 :      * Anything that used to be before the deleted tuple's data was moved
    1369                 :      * forward by the size of the deleted tuple.
    1370                 :      */
    1371             339 :     if (!PageIsEmpty(page))
    1372                 :     {
    1373                 :         int         i;
    1374                 : 
    1375           86527 :         for (i = 1; i <= nline; i++)
    1376                 :         {
    1377 GNC       86192 :             ItemId      ii = PageGetItemId(page, i);
    1378                 : 
    1379 CBC       86192 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1380           42295 :                 ii->lp_off += size;
    1381                 :         }
    1382                 :     }
    1383             339 : }
    1384                 : 
    1385                 : 
    1386                 : /*
    1387                 :  * PageIndexTupleOverwrite
    1388                 :  *
    1389                 :  * Replace a specified tuple on an index page.
    1390                 :  *
    1391                 :  * The new tuple is placed exactly where the old one had been, shifting
    1392                 :  * other tuples' data up or down as needed to keep the page compacted.
    1393                 :  * This is better than deleting and reinserting the tuple, because it
    1394                 :  * avoids any data shifting when the tuple size doesn't change; and
    1395                 :  * even when it does, we avoid moving the line pointers around.
    1396                 :  * This could be used by an index AM that doesn't want to unset the
    1397                 :  * LP_DEAD bit when it happens to be set.  It could conceivably also be
    1398                 :  * used by an index AM that cares about the physical order of tuples as
    1399                 :  * well as their logical/ItemId order.
    1400                 :  *
    1401                 :  * If there's insufficient space for the new tuple, return false.  Other
    1402                 :  * errors represent data-corruption problems, so we just elog.
    1403                 :  */
    1404                 : bool
    1405          515191 : PageIndexTupleOverwrite(Page page, OffsetNumber offnum,
    1406                 :                         Item newtup, Size newsize)
    1407                 : {
    1408          515191 :     PageHeader  phdr = (PageHeader) page;
    1409                 :     ItemId      tupid;
    1410                 :     int         oldsize;
    1411                 :     unsigned    offset;
    1412                 :     Size        alignednewsize;
    1413                 :     int         size_diff;
    1414                 :     int         itemcount;
    1415                 : 
    1416                 :     /*
    1417                 :      * As with PageRepairFragmentation, paranoia seems justified.
    1418                 :      */
    1419          515191 :     if (phdr->pd_lower < SizeOfPageHeaderData ||
    1420          515191 :         phdr->pd_lower > phdr->pd_upper ||
    1421          515191 :         phdr->pd_upper > phdr->pd_special ||
    1422          515191 :         phdr->pd_special > BLCKSZ ||
    1423          515191 :         phdr->pd_special != MAXALIGN(phdr->pd_special))
    1424 UBC           0 :         ereport(ERROR,
    1425                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1426                 :                  errmsg("corrupted page pointers: lower = %u, upper = %u, special = %u",
    1427                 :                         phdr->pd_lower, phdr->pd_upper, phdr->pd_special)));
    1428                 : 
    1429 CBC      515191 :     itemcount = PageGetMaxOffsetNumber(page);
    1430          515191 :     if ((int) offnum <= 0 || (int) offnum > itemcount)
    1431 UBC           0 :         elog(ERROR, "invalid index offnum: %u", offnum);
    1432                 : 
    1433 CBC      515191 :     tupid = PageGetItemId(page, offnum);
    1434          515191 :     Assert(ItemIdHasStorage(tupid));
    1435          515191 :     oldsize = ItemIdGetLength(tupid);
    1436          515191 :     offset = ItemIdGetOffset(tupid);
    1437                 : 
    1438          515191 :     if (offset < phdr->pd_upper || (offset + oldsize) > phdr->pd_special ||
    1439          515191 :         offset != MAXALIGN(offset))
    1440 UBC           0 :         ereport(ERROR,
    1441                 :                 (errcode(ERRCODE_DATA_CORRUPTED),
    1442                 :                  errmsg("corrupted line pointer: offset = %u, size = %u",
    1443                 :                         offset, (unsigned int) oldsize)));
    1444                 : 
    1445                 :     /*
    1446                 :      * Determine actual change in space requirement, check for page overflow.
    1447                 :      */
    1448 CBC      515191 :     oldsize = MAXALIGN(oldsize);
    1449          515191 :     alignednewsize = MAXALIGN(newsize);
    1450          515191 :     if (alignednewsize > oldsize + (phdr->pd_upper - phdr->pd_lower))
    1451 UBC           0 :         return false;
    1452                 : 
    1453                 :     /*
    1454                 :      * Relocate existing data and update line pointers, unless the new tuple
    1455                 :      * is the same size as the old (after alignment), in which case there's
    1456                 :      * nothing to do.  Notice that what we have to relocate is data before the
    1457                 :      * target tuple, not data after, so it's convenient to express size_diff
    1458                 :      * as the amount by which the tuple's size is decreasing, making it the
    1459                 :      * delta to add to pd_upper and affected line pointers.
    1460                 :      */
    1461 CBC      515191 :     size_diff = oldsize - (int) alignednewsize;
    1462          515191 :     if (size_diff != 0)
    1463                 :     {
    1464          113112 :         char       *addr = (char *) page + phdr->pd_upper;
    1465                 :         int         i;
    1466                 : 
    1467                 :         /* relocate all tuple data before the target tuple */
    1468          113112 :         memmove(addr + size_diff, addr, offset - phdr->pd_upper);
    1469                 : 
    1470                 :         /* adjust free space boundary pointer */
    1471          113112 :         phdr->pd_upper += size_diff;
    1472                 : 
    1473                 :         /* adjust affected line pointers too */
    1474        23036090 :         for (i = FirstOffsetNumber; i <= itemcount; i++)
    1475                 :         {
    1476 GNC    22922978 :             ItemId      ii = PageGetItemId(page, i);
    1477                 : 
    1478                 :             /* Allow items without storage; currently only BRIN needs that */
    1479 CBC    22922978 :             if (ItemIdHasStorage(ii) && ItemIdGetOffset(ii) <= offset)
    1480        11315636 :                 ii->lp_off += size_diff;
    1481                 :         }
    1482                 :     }
    1483                 : 
    1484                 :     /* Update the item's tuple length without changing its lp_flags field */
    1485          515191 :     tupid->lp_off = offset + size_diff;
    1486          515191 :     tupid->lp_len = newsize;
    1487                 : 
    1488                 :     /* Copy new tuple data onto page */
    1489          515191 :     memcpy(PageGetItem(page, tupid), newtup, newsize);
    1490                 : 
    1491          515191 :     return true;
    1492                 : }
    1493                 : 
    1494                 : 
    1495                 : /*
    1496                 :  * Set checksum for a page in shared buffers.
    1497                 :  *
    1498                 :  * If checksums are disabled, or if the page is not initialized, just return
    1499                 :  * the input.  Otherwise, we must make a copy of the page before calculating
    1500                 :  * the checksum, to prevent concurrent modifications (e.g. setting hint bits)
    1501                 :  * from making the final checksum invalid.  It doesn't matter if we include or
    1502                 :  * exclude hints during the copy, as long as we write a valid page and
    1503                 :  * associated checksum.
    1504                 :  *
    1505                 :  * Returns a pointer to the block-sized data that needs to be written. Uses
    1506                 :  * statically-allocated memory, so the caller must immediately write the
    1507                 :  * returned page and not refer to it again.
    1508                 :  */
    1509                 : char *
    1510          725820 : PageSetChecksumCopy(Page page, BlockNumber blkno)
    1511                 : {
    1512                 :     static char *pageCopy = NULL;
    1513                 : 
    1514                 :     /* If we don't need a checksum, just return the passed-in data */
    1515          725820 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1516          724440 :         return (char *) page;
    1517                 : 
    1518                 :     /*
    1519                 :      * We allocate the copy space once and use it over on each subsequent
    1520                 :      * call.  The point of palloc'ing here, rather than having a static char
    1521                 :      * array, is first to ensure adequate alignment for the checksumming code
    1522                 :      * and second to avoid wasting space in processes that never call this.
    1523                 :      */
    1524            1380 :     if (pageCopy == NULL)
    1525 GNC          10 :         pageCopy = MemoryContextAllocAligned(TopMemoryContext,
    1526                 :                                              BLCKSZ,
    1527                 :                                              PG_IO_ALIGN_SIZE,
    1528                 :                                              0);
    1529                 : 
    1530 GIC        1380 :     memcpy(pageCopy, (char *) page, BLCKSZ);
    1531            1380 :     ((PageHeader) pageCopy)->pd_checksum = pg_checksum_page(pageCopy, blkno);
    1532            1380 :     return pageCopy;
    1533 ECB             : }
    1534                 : 
    1535                 : /*
    1536                 :  * Set checksum for a page in private memory.
    1537                 :  *
    1538                 :  * This must only be used when we know that no other process can be modifying
    1539                 :  * the page buffer.
    1540                 :  */
    1541                 : void
    1542 GIC      133657 : PageSetChecksumInplace(Page page, BlockNumber blkno)
    1543                 : {
    1544                 :     /* If we don't need a checksum, just return */
    1545 CBC      133657 :     if (PageIsNew(page) || !DataChecksumsEnabled())
    1546 GIC      133301 :         return;
    1547                 : 
    1548 CBC         356 :     ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
    1549 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a