LCOV - differential code coverage report
Current view: top level - src/backend/access/gist - gistvacuum.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 58.5 % 195 114 81 114
Current Date: 2023-04-08 15:15:32 Functions: 83.3 % 6 5 1 5
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * gistvacuum.c
       4                 :  *    vacuuming routines for the postgres GiST index access method.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/gist/gistvacuum.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/genam.h"
      18                 : #include "access/gist_private.h"
      19                 : #include "access/transam.h"
      20                 : #include "commands/vacuum.h"
      21                 : #include "lib/integerset.h"
      22                 : #include "miscadmin.h"
      23                 : #include "storage/indexfsm.h"
      24                 : #include "storage/lmgr.h"
      25                 : #include "utils/memutils.h"
      26                 : 
      27                 : /* Working state needed by gistbulkdelete */
      28                 : typedef struct
      29                 : {
      30                 :     IndexVacuumInfo *info;
      31                 :     IndexBulkDeleteResult *stats;
      32                 :     IndexBulkDeleteCallback callback;
      33                 :     void       *callback_state;
      34                 :     GistNSN     startNSN;
      35                 : 
      36                 :     /*
      37                 :      * These are used to memorize all internal and empty leaf pages.  They are
      38                 :      * used for deleting all the empty pages.
      39                 :      */
      40                 :     IntegerSet *internal_page_set;
      41                 :     IntegerSet *empty_leaf_set;
      42                 :     MemoryContext page_set_context;
      43                 : } GistVacState;
      44                 : 
      45                 : static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      46                 :                            IndexBulkDeleteCallback callback, void *callback_state);
      47                 : static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
      48                 :                            BlockNumber orig_blkno);
      49                 : static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
      50                 :                                           GistVacState *vstate);
      51                 : static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      52                 :                            Buffer parentBuffer, OffsetNumber downlink,
      53                 :                            Buffer leafBuffer);
      54                 : 
      55                 : /*
      56                 :  * VACUUM bulkdelete stage: remove index entries.
      57                 :  */
      58                 : IndexBulkDeleteResult *
      59 CBC          20 : gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
      60                 :                IndexBulkDeleteCallback callback, void *callback_state)
      61                 : {
      62                 :     /* allocate stats if first time through, else re-use existing struct */
      63              20 :     if (stats == NULL)
      64              20 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      65                 : 
      66              20 :     gistvacuumscan(info, stats, callback, callback_state);
      67                 : 
      68              20 :     return stats;
      69                 : }
      70                 : 
      71                 : /*
      72                 :  * VACUUM cleanup stage: delete empty pages, and update index statistics.
      73                 :  */
      74                 : IndexBulkDeleteResult *
      75              42 : gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
      76                 : {
      77                 :     /* No-op in ANALYZE ONLY mode */
      78              42 :     if (info->analyze_only)
      79               9 :         return stats;
      80                 : 
      81                 :     /*
      82                 :      * If gistbulkdelete was called, we need not do anything, just return the
      83                 :      * stats from the latest gistbulkdelete call.  If it wasn't called, we
      84                 :      * still need to do a pass over the index, to obtain index statistics.
      85                 :      */
      86              33 :     if (stats == NULL)
      87                 :     {
      88              22 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
      89              22 :         gistvacuumscan(info, stats, NULL, NULL);
      90                 :     }
      91                 : 
      92                 :     /*
      93                 :      * It's quite possible for us to be fooled by concurrent page splits into
      94                 :      * double-counting some index tuples, so disbelieve any total that exceeds
      95                 :      * the underlying heap's count ... if we know that accurately.  Otherwise
      96                 :      * this might just make matters worse.
      97                 :      */
      98              33 :     if (!info->estimated_count)
      99                 :     {
     100              33 :         if (stats->num_index_tuples > info->num_heap_tuples)
     101 UBC           0 :             stats->num_index_tuples = info->num_heap_tuples;
     102                 :     }
     103                 : 
     104 CBC          33 :     return stats;
     105                 : }
     106                 : 
     107                 : /*
     108                 :  * gistvacuumscan --- scan the index for VACUUMing purposes
     109                 :  *
     110                 :  * This scans the index for leaf tuples that are deletable according to the
     111                 :  * vacuum callback, and updates the stats.  Both btbulkdelete and
     112                 :  * btvacuumcleanup invoke this (the latter only if no btbulkdelete call
     113                 :  * occurred).
     114                 :  *
     115                 :  * This also makes note of any empty leaf pages, as well as all internal
     116                 :  * pages while looping over all index pages.  After scanning all the pages, we
     117                 :  * remove the empty pages so that they can be reused.  Any deleted pages are
     118                 :  * added directly to the free space map.  (They should've been added there
     119                 :  * when they were originally deleted, already, but it's possible that the FSM
     120                 :  * was lost at a crash, for example.)
     121                 :  *
     122                 :  * The caller is responsible for initially allocating/zeroing a stats struct.
     123                 :  */
     124                 : static void
     125              42 : gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     126                 :                IndexBulkDeleteCallback callback, void *callback_state)
     127                 : {
     128              42 :     Relation    rel = info->index;
     129                 :     GistVacState vstate;
     130                 :     BlockNumber num_pages;
     131                 :     bool        needLock;
     132                 :     BlockNumber blkno;
     133                 :     MemoryContext oldctx;
     134                 : 
     135                 :     /*
     136                 :      * Reset fields that track information about the entire index now.  This
     137                 :      * avoids double-counting in the case where a single VACUUM command
     138                 :      * requires multiple scans of the index.
     139                 :      *
     140                 :      * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
     141                 :      * since they track information about the VACUUM command, and so must last
     142                 :      * across each call to gistvacuumscan().
     143                 :      *
     144                 :      * (Note that pages_free is treated as state about the whole index, not
     145                 :      * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
     146                 :      * calls are idempotent, and get repeated for the same deleted pages in
     147                 :      * some scenarios.  The point for us is to track the number of recyclable
     148                 :      * pages in the index at the end of the VACUUM command.)
     149                 :      */
     150              42 :     stats->num_pages = 0;
     151              42 :     stats->estimated_count = false;
     152              42 :     stats->num_index_tuples = 0;
     153              42 :     stats->pages_deleted = 0;
     154              42 :     stats->pages_free = 0;
     155                 : 
     156                 :     /*
     157                 :      * Create the integer sets to remember all the internal and the empty leaf
     158                 :      * pages in page_set_context.  Internally, the integer set will remember
     159                 :      * this context so that the subsequent allocations for these integer sets
     160                 :      * will be done from the same context.
     161                 :      *
     162                 :      * XXX the allocation sizes used below pre-date generation context's block
     163                 :      * growing code.  These values should likely be benchmarked and set to
     164                 :      * more suitable values.
     165                 :      */
     166              42 :     vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
     167                 :                                                       "GiST VACUUM page set context",
     168                 :                                                       16 * 1024,
     169                 :                                                       16 * 1024,
     170                 :                                                       16 * 1024);
     171              42 :     oldctx = MemoryContextSwitchTo(vstate.page_set_context);
     172              42 :     vstate.internal_page_set = intset_create();
     173              42 :     vstate.empty_leaf_set = intset_create();
     174              42 :     MemoryContextSwitchTo(oldctx);
     175                 : 
     176                 :     /* Set up info to pass down to gistvacuumpage */
     177              42 :     vstate.info = info;
     178              42 :     vstate.stats = stats;
     179              42 :     vstate.callback = callback;
     180              42 :     vstate.callback_state = callback_state;
     181              42 :     if (RelationNeedsWAL(rel))
     182              42 :         vstate.startNSN = GetInsertRecPtr();
     183                 :     else
     184 UBC           0 :         vstate.startNSN = gistGetFakeLSN(rel);
     185                 : 
     186                 :     /*
     187                 :      * The outer loop iterates over all index pages, in physical order (we
     188                 :      * hope the kernel will cooperate in providing read-ahead for speed).  It
     189                 :      * is critical that we visit all leaf pages, including ones added after we
     190                 :      * start the scan, else we might fail to delete some deletable tuples.
     191                 :      * Hence, we must repeatedly check the relation length.  We must acquire
     192                 :      * the relation-extension lock while doing so to avoid a race condition:
     193                 :      * if someone else is extending the relation, there is a window where
     194                 :      * bufmgr/smgr have created a new all-zero page but it hasn't yet been
     195                 :      * write-locked by gistNewBuffer().  If we manage to scan such a page
     196                 :      * here, we'll improperly assume it can be recycled.  Taking the lock
     197                 :      * synchronizes things enough to prevent a problem: either num_pages won't
     198                 :      * include the new page, or gistNewBuffer already has write lock on the
     199                 :      * buffer and it will be fully initialized before we can examine it.  (See
     200                 :      * also vacuumlazy.c, which has the same issue.)  Also, we need not worry
     201                 :      * if a page is added immediately after we look; the page splitting code
     202                 :      * already has write-lock on the left page before it adds a right page, so
     203                 :      * we must already have processed any tuples due to be moved into such a
     204                 :      * page.
     205                 :      *
     206                 :      * We can skip locking for new or temp relations, however, since no one
     207                 :      * else could be accessing them.
     208                 :      */
     209 CBC          42 :     needLock = !RELATION_IS_LOCAL(rel);
     210                 : 
     211              42 :     blkno = GIST_ROOT_BLKNO;
     212                 :     for (;;)
     213                 :     {
     214                 :         /* Get the current relation length */
     215              84 :         if (needLock)
     216              84 :             LockRelationForExtension(rel, ExclusiveLock);
     217              84 :         num_pages = RelationGetNumberOfBlocks(rel);
     218              84 :         if (needLock)
     219              84 :             UnlockRelationForExtension(rel, ExclusiveLock);
     220                 : 
     221                 :         /* Quit if we've scanned the whole relation */
     222              84 :         if (blkno >= num_pages)
     223              42 :             break;
     224                 :         /* Iterate over pages, then loop back to recheck length */
     225            1373 :         for (; blkno < num_pages; blkno++)
     226            1331 :             gistvacuumpage(&vstate, blkno, blkno);
     227                 :     }
     228                 : 
     229                 :     /*
     230                 :      * If we found any recyclable pages (and recorded them in the FSM), then
     231                 :      * forcibly update the upper-level FSM pages to ensure that searchers can
     232                 :      * find them.  It's possible that the pages were also found during
     233                 :      * previous scans and so this is a waste of time, but it's cheap enough
     234                 :      * relative to scanning the index that it shouldn't matter much, and
     235                 :      * making sure that free pages are available sooner not later seems
     236                 :      * worthwhile.
     237                 :      *
     238                 :      * Note that if no recyclable pages exist, we don't bother vacuuming the
     239                 :      * FSM at all.
     240                 :      */
     241              42 :     if (stats->pages_free > 0)
     242 UBC           0 :         IndexFreeSpaceMapVacuum(rel);
     243                 : 
     244                 :     /* update statistics */
     245 CBC          42 :     stats->num_pages = num_pages;
     246                 : 
     247                 :     /*
     248                 :      * If we saw any empty pages, try to unlink them from the tree so that
     249                 :      * they can be reused.
     250                 :      */
     251              42 :     gistvacuum_delete_empty_pages(info, &vstate);
     252                 : 
     253                 :     /* we don't need the internal and empty page sets anymore */
     254              42 :     MemoryContextDelete(vstate.page_set_context);
     255              42 :     vstate.page_set_context = NULL;
     256              42 :     vstate.internal_page_set = NULL;
     257              42 :     vstate.empty_leaf_set = NULL;
     258              42 : }
     259                 : 
     260                 : /*
     261                 :  * gistvacuumpage --- VACUUM one page
     262                 :  *
     263                 :  * This processes a single page for gistbulkdelete().  In some cases we
     264                 :  * must go back and re-examine previously-scanned pages; this routine
     265                 :  * recurses when necessary to handle that case.
     266                 :  *
     267                 :  * blkno is the page to process.  orig_blkno is the highest block number
     268                 :  * reached by the outer gistvacuumscan loop (the same as blkno, unless we
     269                 :  * are recursing to re-examine a previous page).
     270                 :  */
     271                 : static void
     272            1331 : gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
     273                 : {
     274            1331 :     IndexVacuumInfo *info = vstate->info;
     275            1331 :     IndexBulkDeleteCallback callback = vstate->callback;
     276            1331 :     void       *callback_state = vstate->callback_state;
     277            1331 :     Relation    rel = info->index;
     278                 :     Buffer      buffer;
     279                 :     Page        page;
     280                 :     BlockNumber recurse_to;
     281                 : 
     282            1331 : restart:
     283            1331 :     recurse_to = InvalidBlockNumber;
     284                 : 
     285                 :     /* call vacuum_delay_point while not holding any buffer lock */
     286            1331 :     vacuum_delay_point();
     287                 : 
     288            1331 :     buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
     289                 :                                 info->strategy);
     290                 : 
     291                 :     /*
     292                 :      * We are not going to stay here for a long time, aggressively grab an
     293                 :      * exclusive lock.
     294                 :      */
     295            1331 :     LockBuffer(buffer, GIST_EXCLUSIVE);
     296            1331 :     page = (Page) BufferGetPage(buffer);
     297                 : 
     298            1331 :     if (gistPageRecyclable(page))
     299                 :     {
     300                 :         /* Okay to recycle this page */
     301 UBC           0 :         RecordFreeIndexPage(rel, blkno);
     302               0 :         vstate->stats->pages_deleted++;
     303               0 :         vstate->stats->pages_free++;
     304                 :     }
     305 CBC        1331 :     else if (GistPageIsDeleted(page))
     306                 :     {
     307                 :         /* Already deleted, but can't recycle yet */
     308 UBC           0 :         vstate->stats->pages_deleted++;
     309                 :     }
     310 CBC        1331 :     else if (GistPageIsLeaf(page))
     311                 :     {
     312                 :         OffsetNumber todelete[MaxOffsetNumber];
     313            1304 :         int         ntodelete = 0;
     314                 :         int         nremain;
     315            1304 :         GISTPageOpaque opaque = GistPageGetOpaque(page);
     316            1304 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     317                 : 
     318                 :         /*
     319                 :          * Check whether we need to recurse back to earlier pages.  What we
     320                 :          * are concerned about is a page split that happened since we started
     321                 :          * the vacuum scan.  If the split moved some tuples to a lower page
     322                 :          * then we might have missed 'em.  If so, set up for tail recursion.
     323                 :          *
     324                 :          * This is similar to the checks we do during searches, when following
     325                 :          * a downlink, but we don't need to jump to higher-numbered pages,
     326                 :          * because we will process them later, anyway.
     327                 :          */
     328            2608 :         if ((GistFollowRight(page) ||
     329            1304 :              vstate->startNSN < GistPageGetNSN(page)) &&
     330 UBC           0 :             (opaque->rightlink != InvalidBlockNumber) &&
     331               0 :             (opaque->rightlink < orig_blkno))
     332                 :         {
     333               0 :             recurse_to = opaque->rightlink;
     334                 :         }
     335                 : 
     336                 :         /*
     337                 :          * Scan over all items to see which ones need to be deleted according
     338                 :          * to the callback function.
     339                 :          */
     340 CBC        1304 :         if (callback)
     341                 :         {
     342                 :             OffsetNumber off;
     343                 : 
     344             101 :             for (off = FirstOffsetNumber;
     345           10991 :                  off <= maxoff;
     346           10890 :                  off = OffsetNumberNext(off))
     347                 :             {
     348           10890 :                 ItemId      iid = PageGetItemId(page, off);
     349           10890 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     350                 : 
     351           10890 :                 if (callback(&(idxtuple->t_tid), callback_state))
     352            5842 :                     todelete[ntodelete++] = off;
     353                 :             }
     354                 :         }
     355                 : 
     356                 :         /*
     357                 :          * Apply any needed deletes.  We issue just one WAL record per page,
     358                 :          * so as to minimize WAL traffic.
     359                 :          */
     360            1304 :         if (ntodelete > 0)
     361                 :         {
     362              92 :             START_CRIT_SECTION();
     363                 : 
     364              92 :             MarkBufferDirty(buffer);
     365                 : 
     366              92 :             PageIndexMultiDelete(page, todelete, ntodelete);
     367              92 :             GistMarkTuplesDeleted(page);
     368                 : 
     369              92 :             if (RelationNeedsWAL(rel))
     370              92 :             {
     371                 :                 XLogRecPtr  recptr;
     372                 : 
     373              92 :                 recptr = gistXLogUpdate(buffer,
     374                 :                                         todelete, ntodelete,
     375                 :                                         NULL, 0, InvalidBuffer);
     376              92 :                 PageSetLSN(page, recptr);
     377                 :             }
     378                 :             else
     379 UBC           0 :                 PageSetLSN(page, gistGetFakeLSN(rel));
     380                 : 
     381 CBC          92 :             END_CRIT_SECTION();
     382                 : 
     383              92 :             vstate->stats->tuples_removed += ntodelete;
     384                 :             /* must recompute maxoff */
     385              92 :             maxoff = PageGetMaxOffsetNumber(page);
     386                 :         }
     387                 : 
     388            1304 :         nremain = maxoff - FirstOffsetNumber + 1;
     389            1304 :         if (nremain == 0)
     390                 :         {
     391                 :             /*
     392                 :              * The page is now completely empty.  Remember its block number,
     393                 :              * so that we will try to delete the page in the second stage.
     394                 :              *
     395                 :              * Skip this when recursing, because IntegerSet requires that the
     396                 :              * values are added in ascending order.  The next VACUUM will pick
     397                 :              * it up.
     398                 :              */
     399               6 :             if (blkno == orig_blkno)
     400               6 :                 intset_add_member(vstate->empty_leaf_set, blkno);
     401                 :         }
     402                 :         else
     403            1298 :             vstate->stats->num_index_tuples += nremain;
     404                 :     }
     405                 :     else
     406                 :     {
     407                 :         /*
     408                 :          * On an internal page, check for "invalid tuples", left behind by an
     409                 :          * incomplete page split on PostgreSQL 9.0 or below.  These are not
     410                 :          * created by newer PostgreSQL versions, but unfortunately, there is
     411                 :          * no version number anywhere in a GiST index, so we don't know
     412                 :          * whether this index might still contain invalid tuples or not.
     413                 :          */
     414              27 :         OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
     415                 :         OffsetNumber off;
     416                 : 
     417              27 :         for (off = FirstOffsetNumber;
     418            1316 :              off <= maxoff;
     419            1289 :              off = OffsetNumberNext(off))
     420                 :         {
     421            1289 :             ItemId      iid = PageGetItemId(page, off);
     422            1289 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     423                 : 
     424            1289 :             if (GistTupleIsInvalid(idxtuple))
     425 UBC           0 :                 ereport(LOG,
     426                 :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     427                 :                                 RelationGetRelationName(rel)),
     428                 :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     429                 :                          errhint("Please REINDEX it.")));
     430                 :         }
     431                 : 
     432                 :         /*
     433                 :          * Remember the block number of this page, so that we can revisit it
     434                 :          * later in gistvacuum_delete_empty_pages(), when we search for
     435                 :          * parents of empty leaf pages.
     436                 :          */
     437 CBC          27 :         if (blkno == orig_blkno)
     438              27 :             intset_add_member(vstate->internal_page_set, blkno);
     439                 :     }
     440                 : 
     441            1331 :     UnlockReleaseBuffer(buffer);
     442                 : 
     443                 :     /*
     444                 :      * This is really tail recursion, but if the compiler is too stupid to
     445                 :      * optimize it as such, we'd eat an uncomfortably large amount of stack
     446                 :      * space per recursion level (due to the deletable[] array).  A failure is
     447                 :      * improbable since the number of levels isn't likely to be large ... but
     448                 :      * just in case, let's hand-optimize into a loop.
     449                 :      */
     450            1331 :     if (recurse_to != InvalidBlockNumber)
     451                 :     {
     452 UBC           0 :         blkno = recurse_to;
     453               0 :         goto restart;
     454                 :     }
     455 CBC        1331 : }
     456                 : 
     457                 : /*
     458                 :  * Scan all internal pages, and try to delete their empty child pages.
     459                 :  */
     460                 : static void
     461              42 : gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
     462                 : {
     463              42 :     Relation    rel = info->index;
     464                 :     BlockNumber empty_pages_remaining;
     465                 :     uint64      blkno;
     466                 : 
     467                 :     /*
     468                 :      * Rescan all inner pages to find those that have empty child pages.
     469                 :      */
     470              42 :     empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
     471              42 :     intset_begin_iterate(vstate->internal_page_set);
     472              48 :     while (empty_pages_remaining > 0 &&
     473               6 :            intset_iterate_next(vstate->internal_page_set, &blkno))
     474                 :     {
     475                 :         Buffer      buffer;
     476                 :         Page        page;
     477                 :         OffsetNumber off,
     478                 :                     maxoff;
     479                 :         OffsetNumber todelete[MaxOffsetNumber];
     480                 :         BlockNumber leafs_to_delete[MaxOffsetNumber];
     481                 :         int         ntodelete;
     482                 :         int         deleted;
     483                 : 
     484 UBC           0 :         buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber) blkno,
     485                 :                                     RBM_NORMAL, info->strategy);
     486                 : 
     487               0 :         LockBuffer(buffer, GIST_SHARE);
     488               0 :         page = (Page) BufferGetPage(buffer);
     489                 : 
     490               0 :         if (PageIsNew(page) || GistPageIsDeleted(page) || GistPageIsLeaf(page))
     491                 :         {
     492                 :             /*
     493                 :              * This page was an internal page earlier, but now it's something
     494                 :              * else. Shouldn't happen...
     495                 :              */
     496               0 :             Assert(false);
     497                 :             UnlockReleaseBuffer(buffer);
     498                 :             continue;
     499                 :         }
     500                 : 
     501                 :         /*
     502                 :          * Scan all the downlinks, and see if any of them point to empty leaf
     503                 :          * pages.
     504                 :          */
     505               0 :         maxoff = PageGetMaxOffsetNumber(page);
     506               0 :         ntodelete = 0;
     507               0 :         for (off = FirstOffsetNumber;
     508               0 :              off <= maxoff && ntodelete < maxoff - 1;
     509               0 :              off = OffsetNumberNext(off))
     510                 :         {
     511               0 :             ItemId      iid = PageGetItemId(page, off);
     512               0 :             IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
     513                 :             BlockNumber leafblk;
     514                 : 
     515               0 :             leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     516               0 :             if (intset_is_member(vstate->empty_leaf_set, leafblk))
     517                 :             {
     518               0 :                 leafs_to_delete[ntodelete] = leafblk;
     519               0 :                 todelete[ntodelete++] = off;
     520                 :             }
     521                 :         }
     522                 : 
     523                 :         /*
     524                 :          * In order to avoid deadlock, child page must be locked before
     525                 :          * parent, so we must release the lock on the parent, lock the child,
     526                 :          * and then re-acquire the lock the parent.  (And we wouldn't want to
     527                 :          * do I/O, while holding a lock, anyway.)
     528                 :          *
     529                 :          * At the instant that we're not holding a lock on the parent, the
     530                 :          * downlink might get moved by a concurrent insert, so we must
     531                 :          * re-check that it still points to the same child page after we have
     532                 :          * acquired both locks.  Also, another backend might have inserted a
     533                 :          * tuple to the page, so that it is no longer empty.  gistdeletepage()
     534                 :          * re-checks all these conditions.
     535                 :          */
     536               0 :         LockBuffer(buffer, GIST_UNLOCK);
     537                 : 
     538               0 :         deleted = 0;
     539               0 :         for (int i = 0; i < ntodelete; i++)
     540                 :         {
     541                 :             Buffer      leafbuf;
     542                 : 
     543                 :             /*
     544                 :              * Don't remove the last downlink from the parent.  That would
     545                 :              * confuse the insertion code.
     546                 :              */
     547               0 :             if (PageGetMaxOffsetNumber(page) == FirstOffsetNumber)
     548               0 :                 break;
     549                 : 
     550               0 :             leafbuf = ReadBufferExtended(rel, MAIN_FORKNUM, leafs_to_delete[i],
     551                 :                                          RBM_NORMAL, info->strategy);
     552               0 :             LockBuffer(leafbuf, GIST_EXCLUSIVE);
     553               0 :             gistcheckpage(rel, leafbuf);
     554                 : 
     555               0 :             LockBuffer(buffer, GIST_EXCLUSIVE);
     556               0 :             if (gistdeletepage(info, vstate->stats,
     557               0 :                                buffer, todelete[i] - deleted,
     558                 :                                leafbuf))
     559               0 :                 deleted++;
     560               0 :             LockBuffer(buffer, GIST_UNLOCK);
     561                 : 
     562               0 :             UnlockReleaseBuffer(leafbuf);
     563                 :         }
     564                 : 
     565               0 :         ReleaseBuffer(buffer);
     566                 : 
     567                 :         /*
     568                 :          * We can stop the scan as soon as we have seen the downlinks, even if
     569                 :          * we were not able to remove them all.
     570                 :          */
     571               0 :         empty_pages_remaining -= ntodelete;
     572                 :     }
     573 CBC          42 : }
     574                 : 
     575                 : /*
     576                 :  * gistdeletepage takes a leaf page, and its parent, and tries to delete the
     577                 :  * leaf.  Both pages must be locked.
     578                 :  *
     579                 :  * Even if the page was empty when we first saw it, a concurrent inserter might
     580                 :  * have added a tuple to it since.  Similarly, the downlink might have moved.
     581                 :  * We re-check all the conditions, to make sure the page is still deletable,
     582                 :  * before modifying anything.
     583                 :  *
     584                 :  * Returns true, if the page was deleted, and false if a concurrent update
     585                 :  * prevented it.
     586                 :  */
     587                 : static bool
     588 UBC           0 : gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     589                 :                Buffer parentBuffer, OffsetNumber downlink,
     590                 :                Buffer leafBuffer)
     591                 : {
     592               0 :     Page        parentPage = BufferGetPage(parentBuffer);
     593               0 :     Page        leafPage = BufferGetPage(leafBuffer);
     594                 :     ItemId      iid;
     595                 :     IndexTuple  idxtuple;
     596                 :     XLogRecPtr  recptr;
     597                 :     FullTransactionId txid;
     598                 : 
     599                 :     /*
     600                 :      * Check that the leaf is still empty and deletable.
     601                 :      */
     602               0 :     if (!GistPageIsLeaf(leafPage))
     603                 :     {
     604                 :         /* a leaf page should never become a non-leaf page */
     605               0 :         Assert(false);
     606                 :         return false;
     607                 :     }
     608                 : 
     609               0 :     if (GistFollowRight(leafPage))
     610               0 :         return false;           /* don't mess with a concurrent page split */
     611                 : 
     612               0 :     if (PageGetMaxOffsetNumber(leafPage) != InvalidOffsetNumber)
     613               0 :         return false;           /* not empty anymore */
     614                 : 
     615                 :     /*
     616                 :      * Ok, the leaf is deletable.  Is the downlink in the parent page still
     617                 :      * valid?  It might have been moved by a concurrent insert.  We could try
     618                 :      * to re-find it by scanning the page again, possibly moving right if the
     619                 :      * was split.  But for now, let's keep it simple and just give up.  The
     620                 :      * next VACUUM will pick it up.
     621                 :      */
     622               0 :     if (PageIsNew(parentPage) || GistPageIsDeleted(parentPage) ||
     623               0 :         GistPageIsLeaf(parentPage))
     624                 :     {
     625                 :         /* shouldn't happen, internal pages are never deleted */
     626               0 :         Assert(false);
     627                 :         return false;
     628                 :     }
     629                 : 
     630               0 :     if (PageGetMaxOffsetNumber(parentPage) < downlink
     631               0 :         || PageGetMaxOffsetNumber(parentPage) <= FirstOffsetNumber)
     632               0 :         return false;
     633                 : 
     634               0 :     iid = PageGetItemId(parentPage, downlink);
     635               0 :     idxtuple = (IndexTuple) PageGetItem(parentPage, iid);
     636               0 :     if (BufferGetBlockNumber(leafBuffer) !=
     637               0 :         ItemPointerGetBlockNumber(&(idxtuple->t_tid)))
     638               0 :         return false;
     639                 : 
     640                 :     /*
     641                 :      * All good, proceed with the deletion.
     642                 :      *
     643                 :      * The page cannot be immediately recycled, because in-progress scans that
     644                 :      * saw the downlink might still visit it.  Mark the page with the current
     645                 :      * next-XID counter, so that we know when it can be recycled.  Once that
     646                 :      * XID becomes older than GlobalXmin, we know that all scans that are
     647                 :      * currently in progress must have ended.  (That's much more conservative
     648                 :      * than needed, but let's keep it safe and simple.)
     649                 :      */
     650               0 :     txid = ReadNextFullTransactionId();
     651                 : 
     652               0 :     START_CRIT_SECTION();
     653                 : 
     654                 :     /* mark the page as deleted */
     655               0 :     MarkBufferDirty(leafBuffer);
     656               0 :     GistPageSetDeleted(leafPage, txid);
     657               0 :     stats->pages_newly_deleted++;
     658               0 :     stats->pages_deleted++;
     659                 : 
     660                 :     /* remove the downlink from the parent */
     661               0 :     MarkBufferDirty(parentBuffer);
     662               0 :     PageIndexTupleDelete(parentPage, downlink);
     663                 : 
     664               0 :     if (RelationNeedsWAL(info->index))
     665               0 :         recptr = gistXLogPageDelete(leafBuffer, txid, parentBuffer, downlink);
     666                 :     else
     667               0 :         recptr = gistGetFakeLSN(info->index);
     668               0 :     PageSetLSN(parentPage, recptr);
     669               0 :     PageSetLSN(leafPage, recptr);
     670                 : 
     671               0 :     END_CRIT_SECTION();
     672                 : 
     673               0 :     return true;
     674                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a