LCOV - differential code coverage report
Current view: top level - src/backend/access/hash - hash.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.6 % 311 288 4 9 10 1 167 3 117 12 166 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 13 13 12 1 12
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * hash.c
       4                 :  *    Implementation of Margo Seltzer's Hashing package for postgres.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/hash/hash.c
      12                 :  *
      13                 :  * NOTES
      14                 :  *    This file contains only the public interface routines.
      15                 :  *
      16                 :  *-------------------------------------------------------------------------
      17                 :  */
      18                 : 
      19                 : #include "postgres.h"
      20                 : 
      21                 : #include "access/hash.h"
      22                 : #include "access/hash_xlog.h"
      23                 : #include "access/relscan.h"
      24                 : #include "access/tableam.h"
      25                 : #include "access/xloginsert.h"
      26                 : #include "catalog/index.h"
      27                 : #include "commands/progress.h"
      28                 : #include "commands/vacuum.h"
      29                 : #include "miscadmin.h"
      30                 : #include "optimizer/plancat.h"
      31                 : #include "pgstat.h"
      32                 : #include "utils/builtins.h"
      33                 : #include "utils/index_selfuncs.h"
      34                 : #include "utils/rel.h"
      35                 : 
      36                 : /* Working state for hashbuild and its callback */
      37                 : typedef struct
      38                 : {
      39                 :     HSpool     *spool;          /* NULL if not using spooling */
      40                 :     double      indtuples;      /* # tuples accepted into index */
      41                 :     Relation    heapRel;        /* heap relation descriptor */
      42                 : } HashBuildState;
      43                 : 
      44                 : static void hashbuildCallback(Relation index,
      45                 :                               ItemPointer tid,
      46                 :                               Datum *values,
      47                 :                               bool *isnull,
      48                 :                               bool tupleIsAlive,
      49                 :                               void *state);
      50                 : 
      51                 : 
      52                 : /*
      53                 :  * Hash handler function: return IndexAmRoutine with access method parameters
      54                 :  * and callbacks.
      55                 :  */
      56                 : Datum
      57 CBC         990 : hashhandler(PG_FUNCTION_ARGS)
      58                 : {
      59             990 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      60                 : 
      61             990 :     amroutine->amstrategies = HTMaxStrategyNumber;
      62             990 :     amroutine->amsupport = HASHNProcs;
      63             990 :     amroutine->amoptsprocnum = HASHOPTIONS_PROC;
      64             990 :     amroutine->amcanorder = false;
      65             990 :     amroutine->amcanorderbyop = false;
      66             990 :     amroutine->amcanbackward = true;
      67             990 :     amroutine->amcanunique = false;
      68             990 :     amroutine->amcanmulticol = false;
      69             990 :     amroutine->amoptionalkey = false;
      70             990 :     amroutine->amsearcharray = false;
      71             990 :     amroutine->amsearchnulls = false;
      72             990 :     amroutine->amstorage = false;
      73             990 :     amroutine->amclusterable = false;
      74             990 :     amroutine->ampredlocks = true;
      75             990 :     amroutine->amcanparallel = false;
      76             990 :     amroutine->amcaninclude = false;
      77             990 :     amroutine->amusemaintenanceworkmem = false;
      78 GNC         990 :     amroutine->amsummarizing = false;
      79 CBC         990 :     amroutine->amparallelvacuumoptions =
      80 ECB             :         VACUUM_OPTION_PARALLEL_BULKDEL;
      81 GIC         990 :     amroutine->amkeytype = INT4OID;
      82 ECB             : 
      83 GIC         990 :     amroutine->ambuild = hashbuild;
      84 CBC         990 :     amroutine->ambuildempty = hashbuildempty;
      85             990 :     amroutine->aminsert = hashinsert;
      86             990 :     amroutine->ambulkdelete = hashbulkdelete;
      87             990 :     amroutine->amvacuumcleanup = hashvacuumcleanup;
      88             990 :     amroutine->amcanreturn = NULL;
      89             990 :     amroutine->amcostestimate = hashcostestimate;
      90             990 :     amroutine->amoptions = hashoptions;
      91             990 :     amroutine->amproperty = NULL;
      92             990 :     amroutine->ambuildphasename = NULL;
      93             990 :     amroutine->amvalidate = hashvalidate;
      94             990 :     amroutine->amadjustmembers = hashadjustmembers;
      95             990 :     amroutine->ambeginscan = hashbeginscan;
      96             990 :     amroutine->amrescan = hashrescan;
      97             990 :     amroutine->amgettuple = hashgettuple;
      98             990 :     amroutine->amgetbitmap = hashgetbitmap;
      99             990 :     amroutine->amendscan = hashendscan;
     100             990 :     amroutine->ammarkpos = NULL;
     101             990 :     amroutine->amrestrpos = NULL;
     102             990 :     amroutine->amestimateparallelscan = NULL;
     103             990 :     amroutine->aminitparallelscan = NULL;
     104             990 :     amroutine->amparallelrescan = NULL;
     105 ECB             : 
     106 GIC         990 :     PG_RETURN_POINTER(amroutine);
     107 ECB             : }
     108                 : 
     109                 : /*
     110                 :  *  hashbuild() -- build a new hash index.
     111                 :  */
     112                 : IndexBuildResult *
     113 GIC         146 : hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     114 ECB             : {
     115                 :     IndexBuildResult *result;
     116                 :     BlockNumber relpages;
     117                 :     double      reltuples;
     118                 :     double      allvisfrac;
     119                 :     uint32      num_buckets;
     120                 :     long        sort_threshold;
     121                 :     HashBuildState buildstate;
     122                 : 
     123                 :     /*
     124                 :      * We expect to be called exactly once for any index relation. If that's
     125                 :      * not the case, big trouble's what we have.
     126                 :      */
     127 GIC         146 :     if (RelationGetNumberOfBlocks(index) != 0)
     128 LBC           0 :         elog(ERROR, "index \"%s\" already contains data",
     129 EUB             :              RelationGetRelationName(index));
     130                 : 
     131                 :     /* Estimate the number of rows currently present in the table */
     132 GIC         146 :     estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
     133 ECB             : 
     134                 :     /* Initialize the hash index metadata page and initial buckets */
     135 GIC         146 :     num_buckets = _hash_init(index, reltuples, MAIN_FORKNUM);
     136 ECB             : 
     137                 :     /*
     138                 :      * If we just insert the tuples into the index in scan order, then
     139                 :      * (assuming their hash codes are pretty random) there will be no locality
     140                 :      * of access to the index, and if the index is bigger than available RAM
     141                 :      * then we'll thrash horribly.  To prevent that scenario, we can sort the
     142                 :      * tuples by (expected) bucket number.  However, such a sort is useless
     143                 :      * overhead when the index does fit in RAM.  We choose to sort if the
     144                 :      * initial index size exceeds maintenance_work_mem, or the number of
     145                 :      * buffers usable for the index, whichever is less.  (Limiting by the
     146                 :      * number of buffers should reduce thrashing between PG buffers and kernel
     147                 :      * buffers, which seems useful even if no physical I/O results.  Limiting
     148                 :      * by maintenance_work_mem is useful to allow easy testing of the sort
     149                 :      * code path, and may be useful to DBAs as an additional control knob.)
     150                 :      *
     151                 :      * NOTE: this test will need adjustment if a bucket is ever different from
     152                 :      * one page.  Also, "initial index size" accounting does not include the
     153                 :      * metapage, nor the first bitmap page.
     154                 :      */
     155 GIC         146 :     sort_threshold = (maintenance_work_mem * 1024L) / BLCKSZ;
     156 CBC         146 :     if (index->rd_rel->relpersistence != RELPERSISTENCE_TEMP)
     157             143 :         sort_threshold = Min(sort_threshold, NBuffers);
     158 ECB             :     else
     159 GIC           3 :         sort_threshold = Min(sort_threshold, NLocBuffer);
     160 ECB             : 
     161 GIC         146 :     if (num_buckets >= (uint32) sort_threshold)
     162 CBC           4 :         buildstate.spool = _h_spoolinit(heap, index, num_buckets);
     163 ECB             :     else
     164 GIC         142 :         buildstate.spool = NULL;
     165 ECB             : 
     166                 :     /* prepare to build the index */
     167 GIC         146 :     buildstate.indtuples = 0;
     168 CBC         146 :     buildstate.heapRel = heap;
     169 ECB             : 
     170                 :     /* do the heap scan */
     171 GIC         146 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     172 ECB             :                                        hashbuildCallback,
     173                 :                                        (void *) &buildstate, NULL);
     174 GIC         146 :     pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_TOTAL,
     175 CBC         146 :                                  buildstate.indtuples);
     176 ECB             : 
     177 GIC         146 :     if (buildstate.spool)
     178 ECB             :     {
     179                 :         /* sort the tuples and insert them into the index */
     180 GIC           4 :         _h_indexbuild(buildstate.spool, buildstate.heapRel);
     181 CBC           4 :         _h_spooldestroy(buildstate.spool);
     182 ECB             :     }
     183                 : 
     184                 :     /*
     185                 :      * Return statistics
     186                 :      */
     187 GIC         146 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     188 ECB             : 
     189 GIC         146 :     result->heap_tuples = reltuples;
     190 CBC         146 :     result->index_tuples = buildstate.indtuples;
     191 ECB             : 
     192 GIC         146 :     return result;
     193 ECB             : }
     194                 : 
     195                 : /*
     196                 :  *  hashbuildempty() -- build an empty hash index in the initialization fork
     197                 :  */
     198                 : void
     199 GIC           3 : hashbuildempty(Relation index)
     200 ECB             : {
     201 GIC           3 :     _hash_init(index, 0, INIT_FORKNUM);
     202 CBC           3 : }
     203 ECB             : 
     204                 : /*
     205                 :  * Per-tuple callback for table_index_build_scan
     206                 :  */
     207                 : static void
     208 GIC      247266 : hashbuildCallback(Relation index,
     209 ECB             :                   ItemPointer tid,
     210                 :                   Datum *values,
     211                 :                   bool *isnull,
     212                 :                   bool tupleIsAlive,
     213                 :                   void *state)
     214                 : {
     215 GIC      247266 :     HashBuildState *buildstate = (HashBuildState *) state;
     216 ECB             :     Datum       index_values[1];
     217                 :     bool        index_isnull[1];
     218                 :     IndexTuple  itup;
     219                 : 
     220                 :     /* convert data to a hash key; on failure, do not insert anything */
     221 GIC      247266 :     if (!_hash_convert_tuple(index,
     222 ECB             :                              values, isnull,
     223                 :                              index_values, index_isnull))
     224 UIC           0 :         return;
     225 EUB             : 
     226                 :     /* Either spool the tuple for sorting, or just put it into the index */
     227 GIC      247266 :     if (buildstate->spool)
     228 CBC       60500 :         _h_spool(buildstate->spool, tid, index_values, index_isnull);
     229 ECB             :     else
     230                 :     {
     231                 :         /* form an index tuple and point it at the heap tuple */
     232 GIC      186766 :         itup = index_form_tuple(RelationGetDescr(index),
     233 ECB             :                                 index_values, index_isnull);
     234 GIC      186766 :         itup->t_tid = *tid;
     235 GNC      186766 :         _hash_doinsert(index, itup, buildstate->heapRel, false);
     236 CBC      186766 :         pfree(itup);
     237 ECB             :     }
     238                 : 
     239 GIC      247266 :     buildstate->indtuples += 1;
     240 ECB             : }
     241                 : 
     242                 : /*
     243                 :  *  hashinsert() -- insert an index tuple into a hash table.
     244                 :  *
     245                 :  *  Hash on the heap tuple's key, form an index tuple with hash code.
     246                 :  *  Find the appropriate location for the new tuple, and put it there.
     247                 :  */
     248                 : bool
     249 GIC       98244 : hashinsert(Relation rel, Datum *values, bool *isnull,
     250 ECB             :            ItemPointer ht_ctid, Relation heapRel,
     251                 :            IndexUniqueCheck checkUnique,
     252                 :            bool indexUnchanged,
     253                 :            IndexInfo *indexInfo)
     254                 : {
     255                 :     Datum       index_values[1];
     256                 :     bool        index_isnull[1];
     257                 :     IndexTuple  itup;
     258                 : 
     259                 :     /* convert data to a hash key; on failure, do not insert anything */
     260 GIC       98244 :     if (!_hash_convert_tuple(rel,
     261 ECB             :                              values, isnull,
     262                 :                              index_values, index_isnull))
     263 UIC           0 :         return false;
     264 EUB             : 
     265                 :     /* form an index tuple and point it at the heap tuple */
     266 GIC       98244 :     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     267 CBC       98244 :     itup->t_tid = *ht_ctid;
     268 ECB             : 
     269 GNC       98244 :     _hash_doinsert(rel, itup, heapRel, false);
     270 ECB             : 
     271 GIC       98238 :     pfree(itup);
     272 ECB             : 
     273 GIC       98238 :     return false;
     274 ECB             : }
     275                 : 
     276                 : 
     277                 : /*
     278                 :  *  hashgettuple() -- Get the next tuple in the scan.
     279                 :  */
     280                 : bool
     281 GIC       50777 : hashgettuple(IndexScanDesc scan, ScanDirection dir)
     282 ECB             : {
     283 GIC       50777 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     284 ECB             :     bool        res;
     285                 : 
     286                 :     /* Hash indexes are always lossy since we store only the hash code */
     287 GIC       50777 :     scan->xs_recheck = true;
     288 ECB             : 
     289                 :     /*
     290                 :      * If we've already initialized this scan, we can just advance it in the
     291                 :      * appropriate direction.  If we haven't done so yet, we call a routine to
     292                 :      * get the first item in the scan.
     293                 :      */
     294 GIC       50777 :     if (!HashScanPosIsValid(so->currPos))
     295 CBC         263 :         res = _hash_first(scan, dir);
     296 ECB             :     else
     297                 :     {
     298                 :         /*
     299                 :          * Check to see if we should kill the previously-fetched tuple.
     300                 :          */
     301 GIC       50514 :         if (scan->kill_prior_tuple)
     302 ECB             :         {
     303                 :             /*
     304                 :              * Yes, so remember it for later. (We'll deal with all such tuples
     305                 :              * at once right after leaving the index page or at end of scan.)
     306                 :              * In case if caller reverses the indexscan direction it is quite
     307                 :              * possible that the same item might get entered multiple times.
     308                 :              * But, we don't detect that; instead, we just forget any excess
     309                 :              * entries.
     310                 :              */
     311 UIC           0 :             if (so->killedItems == NULL)
     312 UBC           0 :                 so->killedItems = (int *)
     313               0 :                     palloc(MaxIndexTuplesPerPage * sizeof(int));
     314 EUB             : 
     315 UIC           0 :             if (so->numKilled < MaxIndexTuplesPerPage)
     316 UBC           0 :                 so->killedItems[so->numKilled++] = so->currPos.itemIndex;
     317 EUB             :         }
     318                 : 
     319                 :         /*
     320                 :          * Now continue the scan.
     321                 :          */
     322 GIC       50514 :         res = _hash_next(scan, dir);
     323 ECB             :     }
     324                 : 
     325 GIC       50776 :     return res;
     326 ECB             : }
     327                 : 
     328                 : 
     329                 : /*
     330                 :  *  hashgetbitmap() -- get all tuples at once
     331                 :  */
     332                 : int64
     333 GIC          30 : hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
     334 ECB             : {
     335 GIC          30 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     336 ECB             :     bool        res;
     337 GIC          30 :     int64       ntids = 0;
     338 ECB             :     HashScanPosItem *currItem;
     339                 : 
     340 GIC          30 :     res = _hash_first(scan, ForwardScanDirection);
     341 ECB             : 
     342 GIC          93 :     while (res)
     343 ECB             :     {
     344 GIC          63 :         currItem = &so->currPos.items[so->currPos.itemIndex];
     345 ECB             : 
     346                 :         /*
     347                 :          * _hash_first and _hash_next handle eliminate dead index entries
     348                 :          * whenever scan->ignore_killed_tuples is true.  Therefore, there's
     349                 :          * nothing to do here except add the results to the TIDBitmap.
     350                 :          */
     351 GIC          63 :         tbm_add_tuples(tbm, &(currItem->heapTid), 1, true);
     352 CBC          63 :         ntids++;
     353 ECB             : 
     354 GIC          63 :         res = _hash_next(scan, ForwardScanDirection);
     355 ECB             :     }
     356                 : 
     357 GIC          30 :     return ntids;
     358 ECB             : }
     359                 : 
     360                 : 
     361                 : /*
     362                 :  *  hashbeginscan() -- start a scan on a hash index
     363                 :  */
     364                 : IndexScanDesc
     365 GIC         185 : hashbeginscan(Relation rel, int nkeys, int norderbys)
     366 ECB             : {
     367                 :     IndexScanDesc scan;
     368                 :     HashScanOpaque so;
     369                 : 
     370                 :     /* no order by operators allowed */
     371 GIC         185 :     Assert(norderbys == 0);
     372 ECB             : 
     373 GIC         185 :     scan = RelationGetIndexScan(rel, nkeys, norderbys);
     374 ECB             : 
     375 GIC         185 :     so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData));
     376 CBC         185 :     HashScanPosInvalidate(so->currPos);
     377             185 :     so->hashso_bucket_buf = InvalidBuffer;
     378             185 :     so->hashso_split_bucket_buf = InvalidBuffer;
     379 ECB             : 
     380 GIC         185 :     so->hashso_buc_populated = false;
     381 CBC         185 :     so->hashso_buc_split = false;
     382 ECB             : 
     383 GIC         185 :     so->killedItems = NULL;
     384 CBC         185 :     so->numKilled = 0;
     385 ECB             : 
     386 GIC         185 :     scan->opaque = so;
     387 ECB             : 
     388 GIC         185 :     return scan;
     389 ECB             : }
     390                 : 
     391                 : /*
     392                 :  *  hashrescan() -- rescan an index relation
     393                 :  */
     394                 : void
     395 GIC         290 : hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
     396 ECB             :            ScanKey orderbys, int norderbys)
     397                 : {
     398 GIC         290 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     399 CBC         290 :     Relation    rel = scan->indexRelation;
     400 ECB             : 
     401 GIC         290 :     if (HashScanPosIsValid(so->currPos))
     402 ECB             :     {
     403                 :         /* Before leaving current page, deal with any killed items */
     404 GIC          42 :         if (so->numKilled > 0)
     405 LBC           0 :             _hash_kill_items(scan);
     406 EUB             :     }
     407                 : 
     408 GIC         290 :     _hash_dropscanbuf(rel, so);
     409 ECB             : 
     410                 :     /* set position invalid (this will cause _hash_first call) */
     411 GIC         290 :     HashScanPosInvalidate(so->currPos);
     412 ECB             : 
     413                 :     /* Update scan key, if a new one is given */
     414 GIC         290 :     if (scankey && scan->numberOfKeys > 0)
     415 ECB             :     {
     416 GIC         290 :         memmove(scan->keyData,
     417 ECB             :                 scankey,
     418 GIC         290 :                 scan->numberOfKeys * sizeof(ScanKeyData));
     419 ECB             :     }
     420                 : 
     421 GIC         290 :     so->hashso_buc_populated = false;
     422 CBC         290 :     so->hashso_buc_split = false;
     423             290 : }
     424 ECB             : 
     425                 : /*
     426                 :  *  hashendscan() -- close down a scan
     427                 :  */
     428                 : void
     429 GIC         184 : hashendscan(IndexScanDesc scan)
     430 ECB             : {
     431 GIC         184 :     HashScanOpaque so = (HashScanOpaque) scan->opaque;
     432 CBC         184 :     Relation    rel = scan->indexRelation;
     433 ECB             : 
     434 GIC         184 :     if (HashScanPosIsValid(so->currPos))
     435 ECB             :     {
     436                 :         /* Before leaving current page, deal with any killed items */
     437 GIC          30 :         if (so->numKilled > 0)
     438 LBC           0 :             _hash_kill_items(scan);
     439 EUB             :     }
     440                 : 
     441 GIC         184 :     _hash_dropscanbuf(rel, so);
     442 ECB             : 
     443 GIC         184 :     if (so->killedItems != NULL)
     444 LBC           0 :         pfree(so->killedItems);
     445 GBC         184 :     pfree(so);
     446 CBC         184 :     scan->opaque = NULL;
     447             184 : }
     448 ECB             : 
     449                 : /*
     450                 :  * Bulk deletion of all index entries pointing to a set of heap tuples.
     451                 :  * The set of target tuples is specified via a callback routine that tells
     452                 :  * whether any given heap tuple (identified by ItemPointer) is being deleted.
     453                 :  *
     454                 :  * This function also deletes the tuples that are moved by split to other
     455                 :  * bucket.
     456                 :  *
     457                 :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     458                 :  */
     459                 : IndexBulkDeleteResult *
     460 GIC          12 : hashbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     461 ECB             :                IndexBulkDeleteCallback callback, void *callback_state)
     462                 : {
     463 GIC          12 :     Relation    rel = info->index;
     464 ECB             :     double      tuples_removed;
     465                 :     double      num_index_tuples;
     466                 :     double      orig_ntuples;
     467                 :     Bucket      orig_maxbucket;
     468                 :     Bucket      cur_maxbucket;
     469                 :     Bucket      cur_bucket;
     470 GIC          12 :     Buffer      metabuf = InvalidBuffer;
     471 ECB             :     HashMetaPage metap;
     472                 :     HashMetaPage cachedmetap;
     473                 : 
     474 GIC          12 :     tuples_removed = 0;
     475 CBC          12 :     num_index_tuples = 0;
     476 ECB             : 
     477                 :     /*
     478                 :      * We need a copy of the metapage so that we can use its hashm_spares[]
     479                 :      * values to compute bucket page addresses, but a cached copy should be
     480                 :      * good enough.  (If not, we'll detect that further down and refresh the
     481                 :      * cache as necessary.)
     482                 :      */
     483 GIC          12 :     cachedmetap = _hash_getcachedmetap(rel, &metabuf, false);
     484 CBC          12 :     Assert(cachedmetap != NULL);
     485 ECB             : 
     486 GIC          12 :     orig_maxbucket = cachedmetap->hashm_maxbucket;
     487 CBC          12 :     orig_ntuples = cachedmetap->hashm_ntuples;
     488 ECB             : 
     489                 :     /* Scan the buckets that we know exist */
     490 GIC          12 :     cur_bucket = 0;
     491 CBC          12 :     cur_maxbucket = orig_maxbucket;
     492 ECB             : 
     493 GIC          12 : loop_top:
     494 CBC         318 :     while (cur_bucket <= cur_maxbucket)
     495 ECB             :     {
     496                 :         BlockNumber bucket_blkno;
     497                 :         BlockNumber blkno;
     498                 :         Buffer      bucket_buf;
     499                 :         Buffer      buf;
     500                 :         HashPageOpaque bucket_opaque;
     501                 :         Page        page;
     502 GIC         306 :         bool        split_cleanup = false;
     503 ECB             : 
     504                 :         /* Get address of bucket's start page */
     505 GIC         306 :         bucket_blkno = BUCKET_TO_BLKNO(cachedmetap, cur_bucket);
     506 ECB             : 
     507 GIC         306 :         blkno = bucket_blkno;
     508 ECB             : 
     509                 :         /*
     510                 :          * We need to acquire a cleanup lock on the primary bucket page to out
     511                 :          * wait concurrent scans before deleting the dead tuples.
     512                 :          */
     513 GIC         306 :         buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, info->strategy);
     514 CBC         306 :         LockBufferForCleanup(buf);
     515             306 :         _hash_checkpage(rel, buf, LH_BUCKET_PAGE);
     516 ECB             : 
     517 GIC         306 :         page = BufferGetPage(buf);
     518 CBC         306 :         bucket_opaque = HashPageGetOpaque(page);
     519 ECB             : 
     520                 :         /*
     521                 :          * If the bucket contains tuples that are moved by split, then we need
     522                 :          * to delete such tuples.  We can't delete such tuples if the split
     523                 :          * operation on bucket is not finished as those are needed by scans.
     524                 :          */
     525 GIC         306 :         if (!H_BUCKET_BEING_SPLIT(bucket_opaque) &&
     526 CBC         306 :             H_NEEDS_SPLIT_CLEANUP(bucket_opaque))
     527 ECB             :         {
     528 UIC           0 :             split_cleanup = true;
     529 EUB             : 
     530                 :             /*
     531                 :              * This bucket might have been split since we last held a lock on
     532                 :              * the metapage.  If so, hashm_maxbucket, hashm_highmask and
     533                 :              * hashm_lowmask might be old enough to cause us to fail to remove
     534                 :              * tuples left behind by the most recent split.  To prevent that,
     535                 :              * now that the primary page of the target bucket has been locked
     536                 :              * (and thus can't be further split), check whether we need to
     537                 :              * update our cached metapage data.
     538                 :              */
     539 UIC           0 :             Assert(bucket_opaque->hasho_prevblkno != InvalidBlockNumber);
     540 UBC           0 :             if (bucket_opaque->hasho_prevblkno > cachedmetap->hashm_maxbucket)
     541 EUB             :             {
     542 UIC           0 :                 cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     543 UBC           0 :                 Assert(cachedmetap != NULL);
     544 EUB             :             }
     545                 :         }
     546                 : 
     547 GIC         306 :         bucket_buf = buf;
     548 ECB             : 
     549 GIC         306 :         hashbucketcleanup(rel, cur_bucket, bucket_buf, blkno, info->strategy,
     550 ECB             :                           cachedmetap->hashm_maxbucket,
     551                 :                           cachedmetap->hashm_highmask,
     552                 :                           cachedmetap->hashm_lowmask, &tuples_removed,
     553                 :                           &num_index_tuples, split_cleanup,
     554                 :                           callback, callback_state);
     555                 : 
     556 GIC         306 :         _hash_dropbuf(rel, bucket_buf);
     557 ECB             : 
     558                 :         /* Advance to next bucket */
     559 GIC         306 :         cur_bucket++;
     560 ECB             :     }
     561                 : 
     562 GIC          12 :     if (BufferIsInvalid(metabuf))
     563 CBC           6 :         metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
     564 ECB             : 
     565                 :     /* Write-lock metapage and check for split since we started */
     566 GIC          12 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
     567 CBC          12 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
     568 ECB             : 
     569 GIC          12 :     if (cur_maxbucket != metap->hashm_maxbucket)
     570 ECB             :     {
     571                 :         /* There's been a split, so process the additional bucket(s) */
     572 UIC           0 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
     573 UBC           0 :         cachedmetap = _hash_getcachedmetap(rel, &metabuf, true);
     574               0 :         Assert(cachedmetap != NULL);
     575               0 :         cur_maxbucket = cachedmetap->hashm_maxbucket;
     576               0 :         goto loop_top;
     577 EUB             :     }
     578                 : 
     579                 :     /* Okay, we're really done.  Update tuple count in metapage. */
     580 GIC          12 :     START_CRIT_SECTION();
     581 ECB             : 
     582 GIC          12 :     if (orig_maxbucket == metap->hashm_maxbucket &&
     583 CBC          12 :         orig_ntuples == metap->hashm_ntuples)
     584 ECB             :     {
     585                 :         /*
     586                 :          * No one has split or inserted anything since start of scan, so
     587                 :          * believe our count as gospel.
     588                 :          */
     589 GIC           6 :         metap->hashm_ntuples = num_index_tuples;
     590 ECB             :     }
     591                 :     else
     592                 :     {
     593                 :         /*
     594                 :          * Otherwise, our count is untrustworthy since we may have
     595                 :          * double-scanned tuples in split buckets.  Proceed by dead-reckoning.
     596                 :          * (Note: we still return estimated_count = false, because using this
     597                 :          * count is better than not updating reltuples at all.)
     598                 :          */
     599 GIC           6 :         if (metap->hashm_ntuples > tuples_removed)
     600 CBC           5 :             metap->hashm_ntuples -= tuples_removed;
     601 ECB             :         else
     602 GIC           1 :             metap->hashm_ntuples = 0;
     603 CBC           6 :         num_index_tuples = metap->hashm_ntuples;
     604 ECB             :     }
     605                 : 
     606 GIC          12 :     MarkBufferDirty(metabuf);
     607 ECB             : 
     608                 :     /* XLOG stuff */
     609 GIC          12 :     if (RelationNeedsWAL(rel))
     610 ECB             :     {
     611                 :         xl_hash_update_meta_page xlrec;
     612                 :         XLogRecPtr  recptr;
     613                 : 
     614 GIC          12 :         xlrec.ntuples = metap->hashm_ntuples;
     615 ECB             : 
     616 GIC          12 :         XLogBeginInsert();
     617 CBC          12 :         XLogRegisterData((char *) &xlrec, SizeOfHashUpdateMetaPage);
     618 ECB             : 
     619 GIC          12 :         XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
     620 ECB             : 
     621 GIC          12 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
     622 CBC          12 :         PageSetLSN(BufferGetPage(metabuf), recptr);
     623 ECB             :     }
     624                 : 
     625 GIC          12 :     END_CRIT_SECTION();
     626 ECB             : 
     627 GIC          12 :     _hash_relbuf(rel, metabuf);
     628 ECB             : 
     629                 :     /* return statistics */
     630 GIC          12 :     if (stats == NULL)
     631 CBC          12 :         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
     632              12 :     stats->estimated_count = false;
     633              12 :     stats->num_index_tuples = num_index_tuples;
     634              12 :     stats->tuples_removed += tuples_removed;
     635 ECB             :     /* hashvacuumcleanup will fill in num_pages */
     636                 : 
     637 GIC          12 :     return stats;
     638 ECB             : }
     639                 : 
     640                 : /*
     641                 :  * Post-VACUUM cleanup.
     642                 :  *
     643                 :  * Result: a palloc'd struct containing statistical info for VACUUM displays.
     644                 :  */
     645                 : IndexBulkDeleteResult *
     646 GIC          23 : hashvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
     647 ECB             : {
     648 GIC          23 :     Relation    rel = info->index;
     649 ECB             :     BlockNumber num_pages;
     650                 : 
     651                 :     /* If hashbulkdelete wasn't called, return NULL signifying no change */
     652                 :     /* Note: this covers the analyze_only case too */
     653 GIC          23 :     if (stats == NULL)
     654 CBC          11 :         return NULL;
     655 ECB             : 
     656                 :     /* update statistics */
     657 GIC          12 :     num_pages = RelationGetNumberOfBlocks(rel);
     658 CBC          12 :     stats->num_pages = num_pages;
     659 ECB             : 
     660 GIC          12 :     return stats;
     661 ECB             : }
     662                 : 
     663                 : /*
     664                 :  * Helper function to perform deletion of index entries from a bucket.
     665                 :  *
     666                 :  * This function expects that the caller has acquired a cleanup lock on the
     667                 :  * primary bucket page, and will return with a write lock again held on the
     668                 :  * primary bucket page.  The lock won't necessarily be held continuously,
     669                 :  * though, because we'll release it when visiting overflow pages.
     670                 :  *
     671                 :  * There can't be any concurrent scans in progress when we first enter this
     672                 :  * function because of the cleanup lock we hold on the primary bucket page,
     673                 :  * but as soon as we release that lock, there might be.  If those scans got
     674                 :  * ahead of our cleanup scan, they might see a tuple before we kill it and
     675                 :  * wake up only after VACUUM has completed and the TID has been recycled for
     676                 :  * an unrelated tuple.  To avoid that calamity, we prevent scans from passing
     677                 :  * our cleanup scan by locking the next page in the bucket chain before
     678                 :  * releasing the lock on the previous page.  (This type of lock chaining is not
     679                 :  * ideal, so we might want to look for a better solution at some point.)
     680                 :  *
     681                 :  * We need to retain a pin on the primary bucket to ensure that no concurrent
     682                 :  * split can start.
     683                 :  */
     684                 : void
     685 GIC         972 : hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
     686 ECB             :                   BlockNumber bucket_blkno, BufferAccessStrategy bstrategy,
     687                 :                   uint32 maxbucket, uint32 highmask, uint32 lowmask,
     688                 :                   double *tuples_removed, double *num_index_tuples,
     689                 :                   bool split_cleanup,
     690                 :                   IndexBulkDeleteCallback callback, void *callback_state)
     691                 : {
     692                 :     BlockNumber blkno;
     693                 :     Buffer      buf;
     694 GIC         972 :     Bucket      new_bucket PG_USED_FOR_ASSERTS_ONLY = InvalidBucket;
     695 CBC         972 :     bool        bucket_dirty = false;
     696 ECB             : 
     697 GIC         972 :     blkno = bucket_blkno;
     698 CBC         972 :     buf = bucket_buf;
     699 ECB             : 
     700 GIC         972 :     if (split_cleanup)
     701 CBC         666 :         new_bucket = _hash_get_newbucket_from_oldbucket(rel, cur_bucket,
     702 ECB             :                                                         lowmask, maxbucket);
     703                 : 
     704                 :     /* Scan each page in bucket */
     705                 :     for (;;)
     706 GIC         173 :     {
     707 ECB             :         HashPageOpaque opaque;
     708                 :         OffsetNumber offno;
     709                 :         OffsetNumber maxoffno;
     710                 :         Buffer      next_buf;
     711                 :         Page        page;
     712                 :         OffsetNumber deletable[MaxOffsetNumber];
     713 GIC        1145 :         int         ndeletable = 0;
     714 CBC        1145 :         bool        retain_pin = false;
     715            1145 :         bool        clear_dead_marking = false;
     716 ECB             : 
     717 GIC        1145 :         vacuum_delay_point();
     718 ECB             : 
     719 GIC        1145 :         page = BufferGetPage(buf);
     720 CBC        1145 :         opaque = HashPageGetOpaque(page);
     721 ECB             : 
     722                 :         /* Scan each tuple in page */
     723 GIC        1145 :         maxoffno = PageGetMaxOffsetNumber(page);
     724 CBC        1145 :         for (offno = FirstOffsetNumber;
     725          224602 :              offno <= maxoffno;
     726          223457 :              offno = OffsetNumberNext(offno))
     727 ECB             :         {
     728                 :             ItemPointer htup;
     729                 :             IndexTuple  itup;
     730                 :             Bucket      bucket;
     731 GIC      223457 :             bool        kill_tuple = false;
     732 ECB             : 
     733 GIC      223457 :             itup = (IndexTuple) PageGetItem(page,
     734 ECB             :                                             PageGetItemId(page, offno));
     735 GIC      223457 :             htup = &(itup->t_tid);
     736 ECB             : 
     737                 :             /*
     738                 :              * To remove the dead tuples, we strictly want to rely on results
     739                 :              * of callback function.  refer btvacuumpage for detailed reason.
     740                 :              */
     741 GIC      223457 :             if (callback && callback(htup, callback_state))
     742 ECB             :             {
     743 GIC        5846 :                 kill_tuple = true;
     744 CBC        5846 :                 if (tuples_removed)
     745            5846 :                     *tuples_removed += 1;
     746 ECB             :             }
     747 GIC      217611 :             else if (split_cleanup)
     748 ECB             :             {
     749                 :                 /* delete the tuples that are moved by split. */
     750 GIC      152611 :                 bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
     751 ECB             :                                               maxbucket,
     752                 :                                               highmask,
     753                 :                                               lowmask);
     754                 :                 /* mark the item for deletion */
     755 GIC      152611 :                 if (bucket != cur_bucket)
     756 ECB             :                 {
     757                 :                     /*
     758                 :                      * We expect tuples to either belong to current bucket or
     759                 :                      * new_bucket.  This is ensured because we don't allow
     760                 :                      * further splits from bucket that contains garbage. See
     761                 :                      * comments in _hash_expandtable.
     762                 :                      */
     763 GIC       62483 :                     Assert(bucket == new_bucket);
     764 CBC       62483 :                     kill_tuple = true;
     765 ECB             :                 }
     766                 :             }
     767                 : 
     768 GIC      223457 :             if (kill_tuple)
     769 ECB             :             {
     770                 :                 /* mark the item for deletion */
     771 GIC       68329 :                 deletable[ndeletable++] = offno;
     772 ECB             :             }
     773                 :             else
     774                 :             {
     775                 :                 /* we're keeping it, so count it */
     776 GIC      155128 :                 if (num_index_tuples)
     777 CBC       65000 :                     *num_index_tuples += 1;
     778 ECB             :             }
     779                 :         }
     780                 : 
     781                 :         /* retain the pin on primary bucket page till end of bucket scan */
     782 GIC        1145 :         if (blkno == bucket_blkno)
     783 CBC         972 :             retain_pin = true;
     784 ECB             :         else
     785 GIC         173 :             retain_pin = false;
     786 ECB             : 
     787 GIC        1145 :         blkno = opaque->hasho_nextblkno;
     788 ECB             : 
     789                 :         /*
     790                 :          * Apply deletions, advance to next page and write page if needed.
     791                 :          */
     792 GIC        1145 :         if (ndeletable > 0)
     793 ECB             :         {
     794                 :             /* No ereport(ERROR) until changes are logged */
     795 GIC         745 :             START_CRIT_SECTION();
     796 ECB             : 
     797 GIC         745 :             PageIndexMultiDelete(page, deletable, ndeletable);
     798 CBC         745 :             bucket_dirty = true;
     799 ECB             : 
     800                 :             /*
     801                 :              * Let us mark the page as clean if vacuum removes the DEAD tuples
     802                 :              * from an index page. We do this by clearing
     803                 :              * LH_PAGE_HAS_DEAD_TUPLES flag.
     804                 :              */
     805 GIC         745 :             if (tuples_removed && *tuples_removed > 0 &&
     806 CBC          51 :                 H_HAS_DEAD_TUPLES(opaque))
     807 ECB             :             {
     808 UIC           0 :                 opaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
     809 UBC           0 :                 clear_dead_marking = true;
     810 EUB             :             }
     811                 : 
     812 GIC         745 :             MarkBufferDirty(buf);
     813 ECB             : 
     814                 :             /* XLOG stuff */
     815 GIC         745 :             if (RelationNeedsWAL(rel))
     816 ECB             :             {
     817                 :                 xl_hash_delete xlrec;
     818                 :                 XLogRecPtr  recptr;
     819                 : 
     820 GIC         619 :                 xlrec.clear_dead_marking = clear_dead_marking;
     821 CBC         619 :                 xlrec.is_primary_bucket_page = (buf == bucket_buf);
     822 ECB             : 
     823 GIC         619 :                 XLogBeginInsert();
     824 CBC         619 :                 XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
     825 ECB             : 
     826                 :                 /*
     827                 :                  * bucket buffer needs to be registered to ensure that we can
     828                 :                  * acquire a cleanup lock on it during replay.
     829                 :                  */
     830 GIC         619 :                 if (!xlrec.is_primary_bucket_page)
     831 CBC          67 :                     XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
     832 ECB             : 
     833 GIC         619 :                 XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
     834 CBC         619 :                 XLogRegisterBufData(1, (char *) deletable,
     835 ECB             :                                     ndeletable * sizeof(OffsetNumber));
     836                 : 
     837 GIC         619 :                 recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
     838 CBC         619 :                 PageSetLSN(BufferGetPage(buf), recptr);
     839 ECB             :             }
     840                 : 
     841 GIC         745 :             END_CRIT_SECTION();
     842 ECB             :         }
     843                 : 
     844                 :         /* bail out if there are no more pages to scan. */
     845 GIC        1145 :         if (!BlockNumberIsValid(blkno))
     846 CBC         972 :             break;
     847 ECB             : 
     848 GIC         173 :         next_buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
     849 ECB             :                                               LH_OVERFLOW_PAGE,
     850                 :                                               bstrategy);
     851                 : 
     852                 :         /*
     853                 :          * release the lock on previous page after acquiring the lock on next
     854                 :          * page
     855                 :          */
     856 GIC         173 :         if (retain_pin)
     857 CBC          32 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
     858 ECB             :         else
     859 GIC         141 :             _hash_relbuf(rel, buf);
     860 ECB             : 
     861 GIC         173 :         buf = next_buf;
     862 ECB             :     }
     863                 : 
     864                 :     /*
     865                 :      * lock the bucket page to clear the garbage flag and squeeze the bucket.
     866                 :      * if the current buffer is same as bucket buffer, then we already have
     867                 :      * lock on bucket page.
     868                 :      */
     869 GIC         972 :     if (buf != bucket_buf)
     870 ECB             :     {
     871 GIC          32 :         _hash_relbuf(rel, buf);
     872 CBC          32 :         LockBuffer(bucket_buf, BUFFER_LOCK_EXCLUSIVE);
     873 ECB             :     }
     874                 : 
     875                 :     /*
     876                 :      * Clear the garbage flag from bucket after deleting the tuples that are
     877                 :      * moved by split.  We purposefully clear the flag before squeeze bucket,
     878                 :      * so that after restart, vacuum shouldn't again try to delete the moved
     879                 :      * by split tuples.
     880                 :      */
     881 GIC         972 :     if (split_cleanup)
     882 ECB             :     {
     883                 :         HashPageOpaque bucket_opaque;
     884                 :         Page        page;
     885                 : 
     886 GIC         666 :         page = BufferGetPage(bucket_buf);
     887 CBC         666 :         bucket_opaque = HashPageGetOpaque(page);
     888 ECB             : 
     889                 :         /* No ereport(ERROR) until changes are logged */
     890 GIC         666 :         START_CRIT_SECTION();
     891 ECB             : 
     892 GIC         666 :         bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
     893 CBC         666 :         MarkBufferDirty(bucket_buf);
     894 ECB             : 
     895                 :         /* XLOG stuff */
     896 GIC         666 :         if (RelationNeedsWAL(rel))
     897 ECB             :         {
     898                 :             XLogRecPtr  recptr;
     899                 : 
     900 GIC         540 :             XLogBeginInsert();
     901 CBC         540 :             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
     902 ECB             : 
     903 GIC         540 :             recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
     904 CBC         540 :             PageSetLSN(page, recptr);
     905 ECB             :         }
     906                 : 
     907 GIC         666 :         END_CRIT_SECTION();
     908 ECB             :     }
     909                 : 
     910                 :     /*
     911                 :      * If we have deleted anything, try to compact free space.  For squeezing
     912                 :      * the bucket, we must have a cleanup lock, else it can impact the
     913                 :      * ordering of tuples for a scan that has started before it.
     914                 :      */
     915 GIC         972 :     if (bucket_dirty && IsBufferCleanupOK(bucket_buf))
     916 CBC         684 :         _hash_squeezebucket(rel, cur_bucket, bucket_blkno, bucket_buf,
     917 ECB             :                             bstrategy);
     918                 :     else
     919 GIC         288 :         LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
     920 CBC         972 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a