LCOV - differential code coverage report
Current view: top level - src/backend/access/brin - brin_pageops.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 77.6 % 294 228 1 17 48 2 25 201 16 26
Current Date: 2023-04-08 15:15:32 Functions: 90.9 % 11 10 1 1 1 8 1 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*
       2                 :  * brin_pageops.c
       3                 :  *      Page-handling routines for BRIN indexes
       4                 :  *
       5                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       6                 :  * Portions Copyright (c) 1994, Regents of the University of California
       7                 :  *
       8                 :  * IDENTIFICATION
       9                 :  *    src/backend/access/brin/brin_pageops.c
      10                 :  */
      11                 : #include "postgres.h"
      12                 : 
      13                 : #include "access/brin_page.h"
      14                 : #include "access/brin_pageops.h"
      15                 : #include "access/brin_revmap.h"
      16                 : #include "access/brin_xlog.h"
      17                 : #include "access/xloginsert.h"
      18                 : #include "miscadmin.h"
      19                 : #include "storage/bufmgr.h"
      20                 : #include "storage/freespace.h"
      21                 : #include "storage/lmgr.h"
      22                 : #include "storage/smgr.h"
      23                 : #include "utils/rel.h"
      24                 : 
      25                 : /*
      26                 :  * Maximum size of an entry in a BRIN_PAGETYPE_REGULAR page.  We can tolerate
      27                 :  * a single item per page, unlike other index AMs.
      28                 :  */
      29                 : #define BrinMaxItemSize \
      30                 :     MAXALIGN_DOWN(BLCKSZ - \
      31                 :                   (MAXALIGN(SizeOfPageHeaderData + \
      32                 :                             sizeof(ItemIdData)) + \
      33                 :                    MAXALIGN(sizeof(BrinSpecialSpace))))
      34                 : 
      35                 : static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
      36                 :                                    bool *extended);
      37                 : static Size br_page_get_freespace(Page page);
      38                 : static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
      39                 : 
      40                 : 
      41                 : /*
      42                 :  * Update tuple origtup (size origsz), located in offset oldoff of buffer
      43                 :  * oldbuf, to newtup (size newsz) as summary tuple for the page range starting
      44                 :  * at heapBlk.  oldbuf must not be locked on entry, and is not locked at exit.
      45                 :  *
      46                 :  * If samepage is true, attempt to put the new tuple in the same page, but if
      47                 :  * there's no room, use some other one.
      48                 :  *
      49                 :  * If the update is successful, return true; the revmap is updated to point to
      50                 :  * the new tuple.  If the update is not done for whatever reason, return false.
      51                 :  * Caller may retry the update if this happens.
      52                 :  */
      53                 : bool
      54 CBC        3436 : brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
      55                 :               BrinRevmap *revmap, BlockNumber heapBlk,
      56                 :               Buffer oldbuf, OffsetNumber oldoff,
      57                 :               const BrinTuple *origtup, Size origsz,
      58                 :               const BrinTuple *newtup, Size newsz,
      59                 :               bool samepage)
      60                 : {
      61                 :     Page        oldpage;
      62                 :     ItemId      oldlp;
      63                 :     BrinTuple  *oldtup;
      64                 :     Size        oldsz;
      65                 :     Buffer      newbuf;
      66            3436 :     BlockNumber newblk = InvalidBlockNumber;
      67                 :     bool        extended;
      68                 : 
      69            3436 :     Assert(newsz == MAXALIGN(newsz));
      70                 : 
      71                 :     /* If the item is oversized, don't bother. */
      72            3436 :     if (newsz > BrinMaxItemSize)
      73                 :     {
      74 UBC           0 :         ereport(ERROR,
      75                 :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
      76                 :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
      77                 :                         newsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
      78                 :         return false;           /* keep compiler quiet */
      79                 :     }
      80                 : 
      81                 :     /* make sure the revmap is long enough to contain the entry we need */
      82 CBC        3436 :     brinRevmapExtend(revmap, heapBlk);
      83                 : 
      84            3436 :     if (!samepage)
      85                 :     {
      86                 :         /* need a page on which to put the item */
      87             307 :         newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
      88             307 :         if (!BufferIsValid(newbuf))
      89                 :         {
      90 UBC           0 :             Assert(!extended);
      91               0 :             return false;
      92                 :         }
      93                 : 
      94                 :         /*
      95                 :          * Note: it's possible (though unlikely) that the returned newbuf is
      96                 :          * the same as oldbuf, if brin_getinsertbuffer determined that the old
      97                 :          * buffer does in fact have enough space.
      98                 :          */
      99 CBC         307 :         if (newbuf == oldbuf)
     100                 :         {
     101 UBC           0 :             Assert(!extended);
     102               0 :             newbuf = InvalidBuffer;
     103                 :         }
     104                 :         else
     105 CBC         307 :             newblk = BufferGetBlockNumber(newbuf);
     106                 :     }
     107                 :     else
     108                 :     {
     109            3129 :         LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     110            3129 :         newbuf = InvalidBuffer;
     111            3129 :         extended = false;
     112                 :     }
     113            3436 :     oldpage = BufferGetPage(oldbuf);
     114            3436 :     oldlp = PageGetItemId(oldpage, oldoff);
     115                 : 
     116                 :     /*
     117                 :      * Check that the old tuple wasn't updated concurrently: it might have
     118                 :      * moved someplace else entirely, and for that matter the whole page
     119                 :      * might've become a revmap page.  Note that in the first two cases
     120                 :      * checked here, the "oldlp" we just calculated is garbage; but
     121                 :      * PageGetItemId() is simple enough that it was safe to do that
     122                 :      * calculation anyway.
     123                 :      */
     124            6872 :     if (!BRIN_IS_REGULAR_PAGE(oldpage) ||
     125            3436 :         oldoff > PageGetMaxOffsetNumber(oldpage) ||
     126            3436 :         !ItemIdIsNormal(oldlp))
     127                 :     {
     128 UBC           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     129                 : 
     130                 :         /*
     131                 :          * If this happens, and the new buffer was obtained by extending the
     132                 :          * relation, then we need to ensure we don't leave it uninitialized or
     133                 :          * forget about it.
     134                 :          */
     135               0 :         if (BufferIsValid(newbuf))
     136                 :         {
     137               0 :             if (extended)
     138               0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     139               0 :             UnlockReleaseBuffer(newbuf);
     140               0 :             if (extended)
     141               0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     142                 :         }
     143               0 :         return false;
     144                 :     }
     145                 : 
     146 CBC        3436 :     oldsz = ItemIdGetLength(oldlp);
     147            3436 :     oldtup = (BrinTuple *) PageGetItem(oldpage, oldlp);
     148                 : 
     149                 :     /*
     150                 :      * ... or it might have been updated in place to different contents.
     151                 :      */
     152            3436 :     if (!brin_tuples_equal(oldtup, oldsz, origtup, origsz))
     153                 :     {
     154 UBC           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     155               0 :         if (BufferIsValid(newbuf))
     156                 :         {
     157                 :             /* As above, initialize and record new page if we got one */
     158               0 :             if (extended)
     159               0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     160               0 :             UnlockReleaseBuffer(newbuf);
     161               0 :             if (extended)
     162               0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     163                 :         }
     164               0 :         return false;
     165                 :     }
     166                 : 
     167                 :     /*
     168                 :      * Great, the old tuple is intact.  We can proceed with the update.
     169                 :      *
     170                 :      * If there's enough room in the old page for the new tuple, replace it.
     171                 :      *
     172                 :      * Note that there might now be enough space on the page even though the
     173                 :      * caller told us there isn't, if a concurrent update moved another tuple
     174                 :      * elsewhere or replaced a tuple with a smaller one.
     175                 :      */
     176 CBC        6581 :     if (((BrinPageFlags(oldpage) & BRIN_EVACUATE_PAGE) == 0) &&
     177            3145 :         brin_can_do_samepage_update(oldbuf, origsz, newsz))
     178                 :     {
     179            3129 :         START_CRIT_SECTION();
     180            3129 :         if (!PageIndexTupleOverwrite(oldpage, oldoff, (Item) unconstify(BrinTuple *, newtup), newsz))
     181 UBC           0 :             elog(ERROR, "failed to replace BRIN tuple");
     182 CBC        3129 :         MarkBufferDirty(oldbuf);
     183                 : 
     184                 :         /* XLOG stuff */
     185            3129 :         if (RelationNeedsWAL(idxrel))
     186                 :         {
     187                 :             xl_brin_samepage_update xlrec;
     188                 :             XLogRecPtr  recptr;
     189            3126 :             uint8       info = XLOG_BRIN_SAMEPAGE_UPDATE;
     190                 : 
     191            3126 :             xlrec.offnum = oldoff;
     192                 : 
     193            3126 :             XLogBeginInsert();
     194            3126 :             XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
     195                 : 
     196            3126 :             XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
     197            3126 :             XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
     198                 : 
     199            3126 :             recptr = XLogInsert(RM_BRIN_ID, info);
     200                 : 
     201            3126 :             PageSetLSN(oldpage, recptr);
     202                 :         }
     203                 : 
     204            3129 :         END_CRIT_SECTION();
     205                 : 
     206            3129 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     207                 : 
     208            3129 :         if (BufferIsValid(newbuf))
     209                 :         {
     210                 :             /* As above, initialize and record new page if we got one */
     211 UBC           0 :             if (extended)
     212               0 :                 brin_initialize_empty_new_buffer(idxrel, newbuf);
     213               0 :             UnlockReleaseBuffer(newbuf);
     214               0 :             if (extended)
     215               0 :                 FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     216                 :         }
     217                 : 
     218 CBC        3129 :         return true;
     219                 :     }
     220             307 :     else if (newbuf == InvalidBuffer)
     221                 :     {
     222                 :         /*
     223                 :          * Not enough space, but caller said that there was. Tell them to
     224                 :          * start over.
     225                 :          */
     226 UBC           0 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     227               0 :         return false;
     228                 :     }
     229                 :     else
     230                 :     {
     231                 :         /*
     232                 :          * Not enough free space on the oldpage. Put the new tuple on the new
     233                 :          * page, and update the revmap.
     234                 :          */
     235 CBC         307 :         Page        newpage = BufferGetPage(newbuf);
     236                 :         Buffer      revmapbuf;
     237                 :         ItemPointerData newtid;
     238                 :         OffsetNumber newoff;
     239             307 :         Size        freespace = 0;
     240                 : 
     241             307 :         revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     242                 : 
     243             307 :         START_CRIT_SECTION();
     244                 : 
     245                 :         /*
     246                 :          * We need to initialize the page if it's newly obtained.  Note we
     247                 :          * will WAL-log the initialization as part of the update, so we don't
     248                 :          * need to do that here.
     249                 :          */
     250             307 :         if (extended)
     251              11 :             brin_page_init(newpage, BRIN_PAGETYPE_REGULAR);
     252                 : 
     253             307 :         PageIndexTupleDeleteNoCompact(oldpage, oldoff);
     254             307 :         newoff = PageAddItem(newpage, (Item) unconstify(BrinTuple *, newtup), newsz,
     255                 :                              InvalidOffsetNumber, false, false);
     256             307 :         if (newoff == InvalidOffsetNumber)
     257 UBC           0 :             elog(ERROR, "failed to add BRIN tuple to new page");
     258 CBC         307 :         MarkBufferDirty(oldbuf);
     259             307 :         MarkBufferDirty(newbuf);
     260                 : 
     261                 :         /* needed to update FSM below */
     262             307 :         if (extended)
     263              11 :             freespace = br_page_get_freespace(newpage);
     264                 : 
     265             307 :         ItemPointerSet(&newtid, newblk, newoff);
     266             307 :         brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
     267             307 :         MarkBufferDirty(revmapbuf);
     268                 : 
     269                 :         /* XLOG stuff */
     270             307 :         if (RelationNeedsWAL(idxrel))
     271                 :         {
     272                 :             xl_brin_update xlrec;
     273                 :             XLogRecPtr  recptr;
     274                 :             uint8       info;
     275                 : 
     276             307 :             info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     277                 : 
     278             307 :             xlrec.insert.offnum = newoff;
     279             307 :             xlrec.insert.heapBlk = heapBlk;
     280             307 :             xlrec.insert.pagesPerRange = pagesPerRange;
     281             307 :             xlrec.oldOffnum = oldoff;
     282                 : 
     283             307 :             XLogBeginInsert();
     284                 : 
     285                 :             /* new page */
     286             307 :             XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
     287                 : 
     288             307 :             XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     289             307 :             XLogRegisterBufData(0, (char *) unconstify(BrinTuple *, newtup), newsz);
     290                 : 
     291                 :             /* revmap page */
     292             307 :             XLogRegisterBuffer(1, revmapbuf, 0);
     293                 : 
     294                 :             /* old page */
     295             307 :             XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
     296                 : 
     297             307 :             recptr = XLogInsert(RM_BRIN_ID, info);
     298                 : 
     299             307 :             PageSetLSN(oldpage, recptr);
     300             307 :             PageSetLSN(newpage, recptr);
     301             307 :             PageSetLSN(BufferGetPage(revmapbuf), recptr);
     302                 :         }
     303                 : 
     304             307 :         END_CRIT_SECTION();
     305                 : 
     306             307 :         LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     307             307 :         LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     308             307 :         UnlockReleaseBuffer(newbuf);
     309                 : 
     310             307 :         if (extended)
     311                 :         {
     312              11 :             RecordPageWithFreeSpace(idxrel, newblk, freespace);
     313              11 :             FreeSpaceMapVacuumRange(idxrel, newblk, newblk + 1);
     314                 :         }
     315                 : 
     316             307 :         return true;
     317                 :     }
     318                 : }
     319                 : 
     320                 : /*
     321                 :  * Return whether brin_doupdate can do a samepage update.
     322                 :  */
     323                 : bool
     324            6290 : brin_can_do_samepage_update(Buffer buffer, Size origsz, Size newsz)
     325                 : {
     326                 :     return
     327            9930 :         ((newsz <= origsz) ||
     328            3640 :          PageGetExactFreeSpace(BufferGetPage(buffer)) >= (newsz - origsz));
     329                 : }
     330                 : 
     331                 : /*
     332                 :  * Insert an index tuple into the index relation.  The revmap is updated to
     333                 :  * mark the range containing the given page as pointing to the inserted entry.
     334                 :  * A WAL record is written.
     335                 :  *
     336                 :  * The buffer, if valid, is first checked for free space to insert the new
     337                 :  * entry; if there isn't enough, a new buffer is obtained and pinned.  No
     338                 :  * buffer lock must be held on entry, no buffer lock is held on exit.
     339                 :  *
     340                 :  * Return value is the offset number where the tuple was inserted.
     341                 :  */
     342                 : OffsetNumber
     343            2591 : brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
     344                 :               BrinRevmap *revmap, Buffer *buffer, BlockNumber heapBlk,
     345                 :               BrinTuple *tup, Size itemsz)
     346                 : {
     347                 :     Page        page;
     348                 :     BlockNumber blk;
     349                 :     OffsetNumber off;
     350            2591 :     Size        freespace = 0;
     351                 :     Buffer      revmapbuf;
     352                 :     ItemPointerData tid;
     353                 :     bool        extended;
     354                 : 
     355            2591 :     Assert(itemsz == MAXALIGN(itemsz));
     356                 : 
     357                 :     /* If the item is oversized, don't even bother. */
     358            2591 :     if (itemsz > BrinMaxItemSize)
     359                 :     {
     360 UBC           0 :         ereport(ERROR,
     361                 :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     362                 :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     363                 :                         itemsz, BrinMaxItemSize, RelationGetRelationName(idxrel))));
     364                 :         return InvalidOffsetNumber; /* keep compiler quiet */
     365                 :     }
     366                 : 
     367                 :     /* Make sure the revmap is long enough to contain the entry we need */
     368 CBC        2591 :     brinRevmapExtend(revmap, heapBlk);
     369                 : 
     370                 :     /*
     371                 :      * Acquire lock on buffer supplied by caller, if any.  If it doesn't have
     372                 :      * enough space, unpin it to obtain a new one below.
     373                 :      */
     374            2591 :     if (BufferIsValid(*buffer))
     375                 :     {
     376                 :         /*
     377                 :          * It's possible that another backend (or ourselves!) extended the
     378                 :          * revmap over the page we held a pin on, so we cannot assume that
     379                 :          * it's still a regular page.
     380                 :          */
     381            1005 :         LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE);
     382            1005 :         if (br_page_get_freespace(BufferGetPage(*buffer)) < itemsz)
     383                 :         {
     384              57 :             UnlockReleaseBuffer(*buffer);
     385              57 :             *buffer = InvalidBuffer;
     386                 :         }
     387                 :     }
     388                 : 
     389                 :     /*
     390                 :      * If we still don't have a usable buffer, have brin_getinsertbuffer
     391                 :      * obtain one for us.
     392                 :      */
     393            2591 :     if (!BufferIsValid(*buffer))
     394                 :     {
     395                 :         do
     396            1643 :             *buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
     397            1643 :         while (!BufferIsValid(*buffer));
     398                 :     }
     399                 :     else
     400             948 :         extended = false;
     401                 : 
     402                 :     /* Now obtain lock on revmap buffer */
     403            2591 :     revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
     404                 : 
     405            2591 :     page = BufferGetPage(*buffer);
     406            2591 :     blk = BufferGetBlockNumber(*buffer);
     407                 : 
     408                 :     /* Execute the actual insertion */
     409            2591 :     START_CRIT_SECTION();
     410            2591 :     if (extended)
     411             176 :         brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     412            2591 :     off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
     413                 :                       false, false);
     414            2591 :     if (off == InvalidOffsetNumber)
     415 UBC           0 :         elog(ERROR, "failed to add BRIN tuple to new page");
     416 CBC        2591 :     MarkBufferDirty(*buffer);
     417                 : 
     418                 :     /* needed to update FSM below */
     419            2591 :     if (extended)
     420             176 :         freespace = br_page_get_freespace(page);
     421                 : 
     422            2591 :     ItemPointerSet(&tid, blk, off);
     423            2591 :     brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, tid);
     424            2591 :     MarkBufferDirty(revmapbuf);
     425                 : 
     426                 :     /* XLOG stuff */
     427            2591 :     if (RelationNeedsWAL(idxrel))
     428                 :     {
     429                 :         xl_brin_insert xlrec;
     430                 :         XLogRecPtr  recptr;
     431                 :         uint8       info;
     432                 : 
     433            2189 :         info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
     434            2189 :         xlrec.heapBlk = heapBlk;
     435            2189 :         xlrec.pagesPerRange = pagesPerRange;
     436            2189 :         xlrec.offnum = off;
     437                 : 
     438            2189 :         XLogBeginInsert();
     439            2189 :         XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
     440                 : 
     441            2189 :         XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
     442            2189 :         XLogRegisterBufData(0, (char *) tup, itemsz);
     443                 : 
     444            2189 :         XLogRegisterBuffer(1, revmapbuf, 0);
     445                 : 
     446            2189 :         recptr = XLogInsert(RM_BRIN_ID, info);
     447                 : 
     448            2189 :         PageSetLSN(page, recptr);
     449            2189 :         PageSetLSN(BufferGetPage(revmapbuf), recptr);
     450                 :     }
     451                 : 
     452            2591 :     END_CRIT_SECTION();
     453                 : 
     454                 :     /* Tuple is firmly on buffer; we can release our locks */
     455            2591 :     LockBuffer(*buffer, BUFFER_LOCK_UNLOCK);
     456            2591 :     LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
     457                 : 
     458                 :     BRIN_elog((DEBUG2, "inserted tuple (%u,%u) for range starting at %u",
     459                 :                blk, off, heapBlk));
     460                 : 
     461            2591 :     if (extended)
     462                 :     {
     463             176 :         RecordPageWithFreeSpace(idxrel, blk, freespace);
     464             176 :         FreeSpaceMapVacuumRange(idxrel, blk, blk + 1);
     465                 :     }
     466                 : 
     467            2591 :     return off;
     468                 : }
     469                 : 
     470                 : /*
     471                 :  * Initialize a page with the given type.
     472                 :  *
     473                 :  * Caller is responsible for marking it dirty, as appropriate.
     474                 :  */
     475                 : void
     476             516 : brin_page_init(Page page, uint16 type)
     477                 : {
     478             516 :     PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace));
     479                 : 
     480             516 :     BrinPageType(page) = type;
     481             516 : }
     482                 : 
     483                 : /*
     484                 :  * Initialize a new BRIN index's metapage.
     485                 :  */
     486                 : void
     487             144 : brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version)
     488                 : {
     489                 :     BrinMetaPageData *metadata;
     490                 : 
     491             144 :     brin_page_init(page, BRIN_PAGETYPE_META);
     492                 : 
     493             144 :     metadata = (BrinMetaPageData *) PageGetContents(page);
     494                 : 
     495             144 :     metadata->brinMagic = BRIN_META_MAGIC;
     496             144 :     metadata->brinVersion = version;
     497             144 :     metadata->pagesPerRange = pagesPerRange;
     498                 : 
     499                 :     /*
     500                 :      * Note we cheat here a little.  0 is not a valid revmap block number
     501                 :      * (because it's the metapage buffer), but doing this enables the first
     502                 :      * revmap page to be created when the index is.
     503                 :      */
     504             144 :     metadata->lastRevmapPage = 0;
     505                 : 
     506                 :     /*
     507                 :      * Set pd_lower just past the end of the metadata.  This is essential,
     508                 :      * because without doing so, metadata will be lost if xlog.c compresses
     509                 :      * the page.
     510                 :      */
     511             144 :     ((PageHeader) page)->pd_lower =
     512             144 :         ((char *) metadata + sizeof(BrinMetaPageData)) - (char *) page;
     513             144 : }
     514                 : 
     515                 : /*
     516                 :  * Initiate page evacuation protocol.
     517                 :  *
     518                 :  * The page must be locked in exclusive mode by the caller.
     519                 :  *
     520                 :  * If the page is not yet initialized or empty, return false without doing
     521                 :  * anything; it can be used for revmap without any further changes.  If it
     522                 :  * contains tuples, mark it for evacuation and return true.
     523                 :  */
     524                 : bool
     525             121 : brin_start_evacuating_page(Relation idxRel, Buffer buf)
     526                 : {
     527                 :     OffsetNumber off;
     528                 :     OffsetNumber maxoff;
     529                 :     Page        page;
     530                 : 
     531             121 :     page = BufferGetPage(buf);
     532                 : 
     533             121 :     if (PageIsNew(page))
     534             119 :         return false;
     535                 : 
     536               2 :     maxoff = PageGetMaxOffsetNumber(page);
     537             292 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     538                 :     {
     539                 :         ItemId      lp;
     540                 : 
     541             291 :         lp = PageGetItemId(page, off);
     542             291 :         if (ItemIdIsUsed(lp))
     543                 :         {
     544                 :             /*
     545                 :              * Prevent other backends from adding more stuff to this page:
     546                 :              * BRIN_EVACUATE_PAGE informs br_page_get_freespace that this page
     547                 :              * can no longer be used to add new tuples.  Note that this flag
     548                 :              * is not WAL-logged, except accidentally.
     549                 :              */
     550               1 :             BrinPageFlags(page) |= BRIN_EVACUATE_PAGE;
     551               1 :             MarkBufferDirtyHint(buf, true);
     552                 : 
     553               1 :             return true;
     554                 :         }
     555                 :     }
     556               1 :     return false;
     557                 : }
     558                 : 
     559                 : /*
     560                 :  * Move all tuples out of a page.
     561                 :  *
     562                 :  * The caller must hold lock on the page. The lock and pin are released.
     563                 :  */
     564                 : void
     565               1 : brin_evacuate_page(Relation idxRel, BlockNumber pagesPerRange,
     566                 :                    BrinRevmap *revmap, Buffer buf)
     567                 : {
     568                 :     OffsetNumber off;
     569                 :     OffsetNumber maxoff;
     570                 :     Page        page;
     571               1 :     BrinTuple  *btup = NULL;
     572               1 :     Size        btupsz = 0;
     573                 : 
     574               1 :     page = BufferGetPage(buf);
     575                 : 
     576               1 :     Assert(BrinPageFlags(page) & BRIN_EVACUATE_PAGE);
     577                 : 
     578               1 :     maxoff = PageGetMaxOffsetNumber(page);
     579             292 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
     580                 :     {
     581                 :         BrinTuple  *tup;
     582                 :         Size        sz;
     583                 :         ItemId      lp;
     584                 : 
     585             291 :         CHECK_FOR_INTERRUPTS();
     586                 : 
     587             291 :         lp = PageGetItemId(page, off);
     588             291 :         if (ItemIdIsUsed(lp))
     589                 :         {
     590             291 :             sz = ItemIdGetLength(lp);
     591             291 :             tup = (BrinTuple *) PageGetItem(page, lp);
     592             291 :             tup = brin_copy_tuple(tup, sz, btup, &btupsz);
     593                 : 
     594             291 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     595                 : 
     596             291 :             if (!brin_doupdate(idxRel, pagesPerRange, revmap, tup->bt_blkno,
     597                 :                                buf, off, tup, sz, tup, sz, false))
     598 UBC           0 :                 off--;          /* retry */
     599                 : 
     600 CBC         291 :             LockBuffer(buf, BUFFER_LOCK_SHARE);
     601                 : 
     602                 :             /* It's possible that someone extended the revmap over this page */
     603             291 :             if (!BRIN_IS_REGULAR_PAGE(page))
     604 UBC           0 :                 break;
     605                 :         }
     606                 :     }
     607                 : 
     608 CBC           1 :     UnlockReleaseBuffer(buf);
     609               1 : }
     610                 : 
     611                 : /*
     612                 :  * Given a BRIN index page, initialize it if necessary, and record its
     613                 :  * current free space in the FSM.
     614                 :  *
     615                 :  * The main use for this is when, during vacuuming, an uninitialized page is
     616                 :  * found, which could be the result of relation extension followed by a crash
     617                 :  * before the page can be used.
     618                 :  *
     619                 :  * Here, we don't bother to update upper FSM pages, instead expecting that our
     620                 :  * caller (brin_vacuum_scan) will fix them at the end of the scan.  Elsewhere
     621                 :  * in this file, it's generally a good idea to propagate additions of free
     622                 :  * space into the upper FSM pages immediately.
     623                 :  */
     624                 : void
     625             183 : brin_page_cleanup(Relation idxrel, Buffer buf)
     626                 : {
     627             183 :     Page        page = BufferGetPage(buf);
     628                 : 
     629                 :     /*
     630                 :      * If a page was left uninitialized, initialize it now; also record it in
     631                 :      * FSM.
     632                 :      *
     633                 :      * Somebody else might be extending the relation concurrently.  To avoid
     634                 :      * re-initializing the page before they can grab the buffer lock, we
     635                 :      * acquire the extension lock momentarily.  Since they hold the extension
     636                 :      * lock from before getting the page and after its been initialized, we're
     637                 :      * sure to see their initialization.
     638                 :      */
     639             183 :     if (PageIsNew(page))
     640                 :     {
     641 UBC           0 :         LockRelationForExtension(idxrel, ShareLock);
     642               0 :         UnlockRelationForExtension(idxrel, ShareLock);
     643                 : 
     644               0 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     645               0 :         if (PageIsNew(page))
     646                 :         {
     647               0 :             brin_initialize_empty_new_buffer(idxrel, buf);
     648               0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     649               0 :             return;
     650                 :         }
     651               0 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     652                 :     }
     653                 : 
     654                 :     /* Nothing to be done for non-regular index pages */
     655 CBC         183 :     if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
     656             141 :         BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
     657              84 :         return;
     658                 : 
     659                 :     /* Measure free space and record it */
     660              99 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf),
     661                 :                             br_page_get_freespace(page));
     662                 : }
     663                 : 
     664                 : /*
     665                 :  * Return a pinned and exclusively locked buffer which can be used to insert an
     666                 :  * index item of size itemsz (caller must ensure not to request sizes
     667                 :  * impossible to fulfill).  If oldbuf is a valid buffer, it is also locked (in
     668                 :  * an order determined to avoid deadlocks).
     669                 :  *
     670                 :  * If we find that the old page is no longer a regular index page (because
     671                 :  * of a revmap extension), the old buffer is unlocked and we return
     672                 :  * InvalidBuffer.
     673                 :  *
     674                 :  * If there's no existing page with enough free space to accommodate the new
     675                 :  * item, the relation is extended.  If this happens, *extended is set to true,
     676                 :  * and it is the caller's responsibility to initialize the page (and WAL-log
     677                 :  * that fact) prior to use.  The caller should also update the FSM with the
     678                 :  * page's remaining free space after the insertion.
     679                 :  *
     680                 :  * Note that the caller is not expected to update FSM unless *extended is set
     681                 :  * true.  This policy means that we'll update FSM when a page is created, and
     682                 :  * when it's found to have too little space for a desired tuple insertion,
     683                 :  * but not every single time we add a tuple to the page.
     684                 :  *
     685                 :  * Note that in some corner cases it is possible for this routine to extend
     686                 :  * the relation and then not return the new page.  It is this routine's
     687                 :  * responsibility to WAL-log the page initialization and to record the page in
     688                 :  * FSM if that happens, since the caller certainly can't do it.
     689                 :  */
     690                 : static Buffer
     691            1950 : brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
     692                 :                      bool *extended)
     693                 : {
     694                 :     BlockNumber oldblk;
     695                 :     BlockNumber newblk;
     696                 :     Page        page;
     697                 :     Size        freespace;
     698                 : 
     699                 :     /* callers must have checked */
     700            1950 :     Assert(itemsz <= BrinMaxItemSize);
     701                 : 
     702            1950 :     if (BufferIsValid(oldbuf))
     703             307 :         oldblk = BufferGetBlockNumber(oldbuf);
     704                 :     else
     705            1643 :         oldblk = InvalidBlockNumber;
     706                 : 
     707                 :     /* Choose initial target page, re-using existing target if known */
     708            1950 :     newblk = RelationGetTargetBlock(irel);
     709            1950 :     if (newblk == InvalidBlockNumber)
     710             150 :         newblk = GetPageWithFreeSpace(irel, itemsz);
     711                 : 
     712                 :     /*
     713                 :      * Loop until we find a page with sufficient free space.  By the time we
     714                 :      * return to caller out of this loop, both buffers are valid and locked;
     715                 :      * if we have to restart here, neither page is locked and newblk isn't
     716                 :      * pinned (if it's even valid).
     717                 :      */
     718                 :     for (;;)
     719              74 :     {
     720                 :         Buffer      buf;
     721            2024 :         bool        extensionLockHeld = false;
     722                 : 
     723            2024 :         CHECK_FOR_INTERRUPTS();
     724                 : 
     725            2024 :         *extended = false;
     726                 : 
     727            2024 :         if (newblk == InvalidBlockNumber)
     728                 :         {
     729                 :             /*
     730                 :              * There's not enough free space in any existing index page,
     731                 :              * according to the FSM: extend the relation to obtain a shiny new
     732                 :              * page.
     733                 :              *
     734                 :              * XXX: It's likely possible to use RBM_ZERO_AND_LOCK here,
     735                 :              * which'd avoid the need to hold the extension lock during buffer
     736                 :              * reclaim.
     737                 :              */
     738 GIC         187 :             if (!RELATION_IS_LOCAL(irel))
     739                 :             {
     740              11 :                 LockRelationForExtension(irel, ExclusiveLock);
     741              11 :                 extensionLockHeld = true;
     742 ECB             :             }
     743 GIC         187 :             buf = ReadBuffer(irel, P_NEW);
     744 CBC         187 :             newblk = BufferGetBlockNumber(buf);
     745             187 :             *extended = true;
     746                 : 
     747 ECB             :             BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
     748                 :                        BufferGetBlockNumber(buf)));
     749                 :         }
     750 GIC        1837 :         else if (newblk == oldblk)
     751                 :         {
     752                 :             /*
     753                 :              * There's an odd corner-case here where the FSM is out-of-date,
     754 ECB             :              * and gave us the old page.
     755                 :              */
     756 GIC          13 :             buf = oldbuf;
     757                 :         }
     758                 :         else
     759                 :         {
     760 CBC        1824 :             buf = ReadBuffer(irel, newblk);
     761                 :         }
     762                 : 
     763                 :         /*
     764 ECB             :          * We lock the old buffer first, if it's earlier than the new one; but
     765                 :          * then we need to check that it hasn't been turned into a revmap page
     766                 :          * concurrently.  If we detect that that happened, give up and tell
     767                 :          * caller to start over.
     768                 :          */
     769 GIC        2024 :         if (BufferIsValid(oldbuf) && oldblk < newblk)
     770                 :         {
     771             311 :             LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     772             311 :             if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
     773 ECB             :             {
     774 UIC           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     775 ECB             : 
     776                 :                 /*
     777                 :                  * It is possible that the new page was obtained from
     778 EUB             :                  * extending the relation.  In that case, we must be sure to
     779                 :                  * record it in the FSM before leaving, because otherwise the
     780                 :                  * space would be lost forever.  However, we cannot let an
     781                 :                  * uninitialized page get in the FSM, so we need to initialize
     782                 :                  * it first.
     783                 :                  */
     784 UIC           0 :                 if (*extended)
     785               0 :                     brin_initialize_empty_new_buffer(irel, buf);
     786                 : 
     787               0 :                 if (extensionLockHeld)
     788 UBC           0 :                     UnlockRelationForExtension(irel, ExclusiveLock);
     789 EUB             : 
     790 UIC           0 :                 ReleaseBuffer(buf);
     791 EUB             : 
     792 UBC           0 :                 if (*extended)
     793                 :                 {
     794               0 :                     FreeSpaceMapVacuumRange(irel, newblk, newblk + 1);
     795                 :                     /* shouldn't matter, but don't confuse caller */
     796               0 :                     *extended = false;
     797                 :                 }
     798 EUB             : 
     799 UIC           0 :                 return InvalidBuffer;
     800 EUB             :             }
     801                 :         }
     802                 : 
     803 GBC        2024 :         LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
     804                 : 
     805 GIC        2024 :         if (extensionLockHeld)
     806              11 :             UnlockRelationForExtension(irel, ExclusiveLock);
     807 ECB             : 
     808 GIC        2024 :         page = BufferGetPage(buf);
     809 ECB             : 
     810                 :         /*
     811                 :          * We have a new buffer to insert into.  Check that the new page has
     812                 :          * enough free space, and return it if it does; otherwise start over.
     813                 :          * (br_page_get_freespace also checks that the FSM didn't hand us a
     814                 :          * page that has since been repurposed for the revmap.)
     815                 :          */
     816 GIC        4048 :         freespace = *extended ?
     817            2024 :             BrinMaxItemSize : br_page_get_freespace(page);
     818            2024 :         if (freespace >= itemsz)
     819                 :         {
     820 CBC        1950 :             RelationSetTargetBlock(irel, newblk);
     821 ECB             : 
     822                 :             /*
     823                 :              * Lock the old buffer if not locked already.  Note that in this
     824                 :              * case we know for sure it's a regular page: it's later than the
     825                 :              * new page we just got, which is not a revmap page, and revmap
     826                 :              * pages are always consecutive.
     827                 :              */
     828 GIC        1950 :             if (BufferIsValid(oldbuf) && oldblk > newblk)
     829                 :             {
     830 UIC           0 :                 LockBuffer(oldbuf, BUFFER_LOCK_EXCLUSIVE);
     831               0 :                 Assert(BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)));
     832 ECB             :             }
     833                 : 
     834 GBC        1950 :             return buf;
     835 EUB             :         }
     836                 : 
     837                 :         /* This page is no good. */
     838 ECB             : 
     839                 :         /*
     840                 :          * If an entirely new page does not contain enough free space for the
     841                 :          * new item, then surely that item is oversized.  Complain loudly; but
     842                 :          * first make sure we initialize the page and record it as free, for
     843                 :          * next time.
     844                 :          */
     845 GIC          74 :         if (*extended)
     846                 :         {
     847 UIC           0 :             brin_initialize_empty_new_buffer(irel, buf);
     848                 :             /* since this should not happen, skip FreeSpaceMapVacuum */
     849 ECB             : 
     850 UIC           0 :             ereport(ERROR,
     851 EUB             :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     852                 :                      errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
     853                 :                             itemsz, freespace, RelationGetRelationName(irel))));
     854                 :             return InvalidBuffer;   /* keep compiler quiet */
     855                 :         }
     856                 : 
     857 GIC          74 :         if (newblk != oldblk)
     858              61 :             UnlockReleaseBuffer(buf);
     859              74 :         if (BufferIsValid(oldbuf) && oldblk <= newblk)
     860              17 :             LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
     861 ECB             : 
     862                 :         /*
     863                 :          * Update the FSM with the new, presumably smaller, freespace value
     864                 :          * for this page, then search for a new target page.
     865                 :          */
     866 GIC          74 :         newblk = RecordAndGetPageWithFreeSpace(irel, newblk, freespace, itemsz);
     867                 :     }
     868                 : }
     869                 : 
     870 ECB             : /*
     871                 :  * Initialize a page as an empty regular BRIN page, WAL-log this, and record
     872                 :  * the page in FSM.
     873                 :  *
     874                 :  * There are several corner situations in which we extend the relation to
     875                 :  * obtain a new page and later find that we cannot use it immediately.  When
     876                 :  * that happens, we don't want to leave the page go unrecorded in FSM, because
     877                 :  * there is no mechanism to get the space back and the index would bloat.
     878                 :  * Also, because we would not WAL-log the action that would initialize the
     879                 :  * page, the page would go uninitialized in a standby (or after recovery).
     880                 :  *
     881                 :  * While we record the page in FSM here, caller is responsible for doing FSM
     882                 :  * upper-page update if that seems appropriate.
     883                 :  */
     884                 : static void
     885 UIC           0 : brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
     886                 : {
     887                 :     Page        page;
     888                 : 
     889 EUB             :     BRIN_elog((DEBUG2,
     890                 :                "brin_initialize_empty_new_buffer: initializing blank page %u",
     891                 :                BufferGetBlockNumber(buffer)));
     892                 : 
     893 UIC           0 :     START_CRIT_SECTION();
     894               0 :     page = BufferGetPage(buffer);
     895               0 :     brin_page_init(page, BRIN_PAGETYPE_REGULAR);
     896               0 :     MarkBufferDirty(buffer);
     897 UBC           0 :     log_newpage_buffer(buffer, true);
     898               0 :     END_CRIT_SECTION();
     899 EUB             : 
     900                 :     /*
     901                 :      * We update the FSM for this page, but this is not WAL-logged.  This is
     902                 :      * acceptable because VACUUM will scan the index and update the FSM with
     903                 :      * pages whose FSM records were forgotten in a crash.
     904                 :      */
     905 UIC           0 :     RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
     906                 :                             br_page_get_freespace(page));
     907               0 : }
     908                 : 
     909 EUB             : 
     910                 : /*
     911                 :  * Return the amount of free space on a regular BRIN index page.
     912                 :  *
     913                 :  * If the page is not a regular page, or has been marked with the
     914                 :  * BRIN_EVACUATE_PAGE flag, returns 0.
     915                 :  */
     916                 : static Size
     917 GIC        3128 : br_page_get_freespace(Page page)
     918                 : {
     919            3128 :     if (!BRIN_IS_REGULAR_PAGE(page) ||
     920            3128 :         (BrinPageFlags(page) & BRIN_EVACUATE_PAGE) != 0)
     921 LBC           0 :         return 0;
     922                 :     else
     923 CBC        3128 :         return PageGetFreeSpace(page);
     924 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a