LCOV - differential code coverage report
Current view: top level - src/backend/access/hash - hashovfl.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 84.3 % 382 322 60 1 321 1
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 7 7 1 6
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 84.3 % 382 322 60 1 321
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 100.0 % 7 7 1 6

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * hashovfl.c
                                  4                 :  *    Overflow page management code for the Postgres hash access method
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/access/hash/hashovfl.c
                                 12                 :  *
                                 13                 :  * NOTES
                                 14                 :  *    Overflow pages look like ordinary relation pages.
                                 15                 :  *
                                 16                 :  *-------------------------------------------------------------------------
                                 17                 :  */
                                 18                 : #include "postgres.h"
                                 19                 : 
                                 20                 : #include "access/hash.h"
                                 21                 : #include "access/hash_xlog.h"
                                 22                 : #include "access/xloginsert.h"
                                 23                 : #include "miscadmin.h"
                                 24                 : #include "utils/rel.h"
                                 25                 : 
                                 26                 : 
                                 27                 : static uint32 _hash_firstfreebit(uint32 map);
                                 28                 : 
                                 29                 : 
                                 30                 : /*
                                 31                 :  * Convert overflow page bit number (its index in the free-page bitmaps)
                                 32                 :  * to block number within the index.
                                 33                 :  */
                                 34                 : static BlockNumber
 7160 tgl                        35 CBC         161 : bitno_to_blkno(HashMetaPage metap, uint32 ovflbitnum)
                                 36                 : {
                                 37             161 :     uint32      splitnum = metap->hashm_ovflpoint;
                                 38                 :     uint32      i;
                                 39                 : 
                                 40                 :     /* Convert zero-based bitnumber to 1-based page number */
                                 41             161 :     ovflbitnum += 1;
                                 42                 : 
                                 43                 :     /* Determine the split number for this page (must be >= 1) */
                                 44             161 :     for (i = 1;
                                 45             975 :          i < splitnum && ovflbitnum > metap->hashm_spares[i];
                                 46             814 :          i++)
                                 47                 :          /* loop */ ;
                                 48                 : 
                                 49                 :     /*
                                 50                 :      * Convert to absolute page number by adding the number of bucket pages
                                 51                 :      * that exist before this split point.
                                 52                 :      */
 2197 rhaas                      53             161 :     return (BlockNumber) (_hash_get_totalbuckets(i) + ovflbitnum);
                                 54                 : }
                                 55                 : 
                                 56                 : /*
                                 57                 :  * _hash_ovflblkno_to_bitno
                                 58                 :  *
                                 59                 :  * Convert overflow page block number to bit number for free-page bitmap.
                                 60                 :  */
                                 61                 : uint32
 2257                            62              69 : _hash_ovflblkno_to_bitno(HashMetaPage metap, BlockNumber ovflblkno)
                                 63                 : {
 7160 tgl                        64              69 :     uint32      splitnum = metap->hashm_ovflpoint;
                                 65                 :     uint32      i;
                                 66                 :     uint32      bitnum;
                                 67                 : 
                                 68                 :     /* Determine the split number containing this page */
                                 69             270 :     for (i = 1; i <= splitnum; i++)
                                 70                 :     {
 2197 rhaas                      71             270 :         if (ovflblkno <= (BlockNumber) _hash_get_totalbuckets(i))
 7160 tgl                        72               4 :             break;              /* oops */
 2197 rhaas                      73             266 :         bitnum = ovflblkno - _hash_get_totalbuckets(i);
                                 74                 : 
                                 75                 :         /*
                                 76                 :          * bitnum has to be greater than number of overflow page added in
                                 77                 :          * previous split point. The overflow page at this splitnum (i) if any
                                 78                 :          * should start from (_hash_get_totalbuckets(i) +
                                 79                 :          * metap->hashm_spares[i - 1] + 1).
                                 80                 :          */
 2250                            81             266 :         if (bitnum > metap->hashm_spares[i - 1] &&
                                 82             266 :             bitnum <= metap->hashm_spares[i])
 7160 tgl                        83              65 :             return bitnum - 1;  /* -1 to convert 1-based to 0-based */
                                 84                 :     }
                                 85                 : 
 2250 rhaas                      86               4 :     ereport(ERROR,
                                 87                 :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                 88                 :              errmsg("invalid overflow block number %u", ovflblkno)));
                                 89                 :     return 0;                   /* keep compiler quiet */
                                 90                 : }
                                 91                 : 
                                 92                 : /*
                                 93                 :  *  _hash_addovflpage
                                 94                 :  *
                                 95                 :  *  Add an overflow page to the bucket whose last page is pointed to by 'buf'.
                                 96                 :  *
                                 97                 :  *  On entry, the caller must hold a pin but no lock on 'buf'.  The pin is
                                 98                 :  *  dropped before exiting (we assume the caller is not interested in 'buf'
                                 99                 :  *  anymore) if not asked to retain.  The pin will be retained only for the
                                100                 :  *  primary bucket.  The returned overflow page will be pinned and
                                101                 :  *  write-locked; it is guaranteed to be empty.
                                102                 :  *
                                103                 :  *  The caller must hold a pin, but no lock, on the metapage buffer.
                                104                 :  *  That buffer is returned in the same state.
                                105                 :  *
                                106                 :  * NB: since this could be executed concurrently by multiple processes,
                                107                 :  * one should not assume that the returned overflow page will be the
                                108                 :  * immediate successor of the originally passed 'buf'.  Additional overflow
                                109                 :  * pages might have been added to the bucket chain in between.
                                110                 :  */
                                111                 : Buffer
 2321                           112             161 : _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
                                113                 : {
                                114                 :     Buffer      ovflbuf;
                                115                 :     Page        page;
                                116                 :     Page        ovflpage;
                                117                 :     HashPageOpaque pageopaque;
                                118                 :     HashPageOpaque ovflopaque;
                                119                 :     HashMetaPage metap;
 2232                           120             161 :     Buffer      mapbuf = InvalidBuffer;
                                121             161 :     Buffer      newmapbuf = InvalidBuffer;
                                122                 :     BlockNumber blkno;
                                123                 :     uint32      orig_firstfree;
                                124                 :     uint32      splitnum;
                                125             161 :     uint32     *freep = NULL;
                                126                 :     uint32      max_ovflpg;
                                127                 :     uint32      bit;
                                128                 :     uint32      bitmap_page_bit;
                                129                 :     uint32      first_page;
                                130                 :     uint32      last_bit;
                                131                 :     uint32      last_page;
                                132                 :     uint32      i,
                                133                 :                 j;
                                134             161 :     bool        page_found = false;
                                135                 : 
                                136                 :     /*
                                137                 :      * Write-lock the tail page.  Here, we need to maintain locking order such
                                138                 :      * that, first acquire the lock on tail page of bucket, then on meta page
                                139                 :      * to find and lock the bitmap page and if it is found, then lock on meta
                                140                 :      * page is released, then finally acquire the lock on new overflow buffer.
                                141                 :      * We need this locking order to avoid deadlock with backends that are
                                142                 :      * doing inserts.
                                143                 :      *
                                144                 :      * Note: We could have avoided locking many buffers here if we made two
                                145                 :      * WAL records for acquiring an overflow page (one to allocate an overflow
                                146                 :      * page and another to add it to overflow bucket chain).  However, doing
                                147                 :      * so can leak an overflow page, if the system crashes after allocation.
                                148                 :      * Needless to say, it is better to have a single record from a
                                149                 :      * performance point of view as well.
                                150                 :      */
 2298                           151             161 :     LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
                                152                 : 
                                153                 :     /* probably redundant... */
 5820 tgl                       154             161 :     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
                                155                 : 
                                156                 :     /* loop to find current tail page, in case someone else inserted too */
                                157                 :     for (;;)
 7157 tgl                       158 UBC           0 :     {
                                159                 :         BlockNumber nextblkno;
                                160                 : 
 2545 kgrittn                   161 CBC         161 :         page = BufferGetPage(buf);
  373 michael                   162             161 :         pageopaque = HashPageGetOpaque(page);
 7157 tgl                       163             161 :         nextblkno = pageopaque->hasho_nextblkno;
                                164                 : 
                                165             161 :         if (!BlockNumberIsValid(nextblkno))
                                166             161 :             break;
                                167                 : 
                                168                 :         /* we assume we do not need to write the unmodified page */
 2280 rhaas                     169 UBC           0 :         if (retain_pin)
                                170                 :         {
                                171                 :             /* pin will be retained only for the primary bucket page */
 2186 tgl                       172               0 :             Assert((pageopaque->hasho_flag & LH_PAGE_TYPE) == LH_BUCKET_PAGE);
 2298 rhaas                     173               0 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                174                 :         }
                                175                 :         else
 2321                           176               0 :             _hash_relbuf(rel, buf);
                                177                 : 
 2280                           178               0 :         retain_pin = false;
                                179                 : 
 5820 tgl                       180               0 :         buf = _hash_getbuf(rel, nextblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
                                181                 :     }
                                182                 : 
                                183                 :     /* Get exclusive lock on the meta page */
 2298 rhaas                     184 CBC         161 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
                                185                 : 
 6363 tgl                       186             161 :     _hash_checkpage(rel, metabuf, LH_META_PAGE);
 2545 kgrittn                   187             161 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
                                188                 : 
                                189                 :     /* start search at hashm_firstfree */
 7157 tgl                       190             161 :     orig_firstfree = metap->hashm_firstfree;
                                191             161 :     first_page = orig_firstfree >> BMPG_SHIFT(metap);
                                192             161 :     bit = orig_firstfree & BMPG_MASK(metap);
                                193             161 :     i = first_page;
 7160                           194             161 :     j = bit / BITS_PER_MAP;
                                195             161 :     bit &= ~(BITS_PER_MAP - 1);
                                196                 : 
                                197                 :     /* outer loop iterates once per bitmap page */
                                198                 :     for (;;)
 9345 bruce                     199             126 :     {
                                200                 :         BlockNumber mapblkno;
                                201                 :         Page        mappage;
                                202                 :         uint32      last_inpage;
                                203                 : 
                                204                 :         /* want to end search with the last existing overflow page */
 7157 tgl                       205             287 :         splitnum = metap->hashm_ovflpoint;
                                206             287 :         max_ovflpg = metap->hashm_spares[splitnum] - 1;
                                207             287 :         last_page = max_ovflpg >> BMPG_SHIFT(metap);
                                208             287 :         last_bit = max_ovflpg & BMPG_MASK(metap);
                                209                 : 
                                210             287 :         if (i > last_page)
                                211             126 :             break;
                                212                 : 
                                213             161 :         Assert(i < metap->hashm_nmaps);
                                214             161 :         mapblkno = metap->hashm_mapp[i];
                                215                 : 
 7160                           216             161 :         if (i == last_page)
                                217             161 :             last_inpage = last_bit;
                                218                 :         else
 7160 tgl                       219 UBC           0 :             last_inpage = BMPGSZ_BIT(metap) - 1;
                                220                 : 
                                221                 :         /* Release exclusive lock on metapage while reading bitmap page */
 2298 rhaas                     222 CBC         161 :         LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
                                223                 : 
 5820 tgl                       224             161 :         mapbuf = _hash_getbuf(rel, mapblkno, HASH_WRITE, LH_BITMAP_PAGE);
 2545 kgrittn                   225             161 :         mappage = BufferGetPage(mapbuf);
 7157 tgl                       226             161 :         freep = HashPageGetBitmap(mappage);
                                227                 : 
 7160                           228             287 :         for (; bit <= last_inpage; j++, bit += BITS_PER_MAP)
                                229                 :         {
 9345 bruce                     230             161 :             if (freep[j] != ALL_SET)
                                231                 :             {
 2232 rhaas                     232              35 :                 page_found = true;
                                233                 : 
                                234                 :                 /* Reacquire exclusive lock on the meta page */
                                235              35 :                 LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
                                236                 : 
                                237                 :                 /* convert bit to bit number within page */
                                238              35 :                 bit += _hash_firstfreebit(freep[j]);
                                239              35 :                 bitmap_page_bit = bit;
                                240                 : 
                                241                 :                 /* convert bit to absolute bit number */
                                242              35 :                 bit += (i << BMPG_SHIFT(metap));
                                243                 :                 /* Calculate address of the recycled overflow page */
                                244              35 :                 blkno = bitno_to_blkno(metap, bit);
                                245                 : 
                                246                 :                 /* Fetch and init the recycled page */
                                247              35 :                 ovflbuf = _hash_getinitbuf(rel, blkno);
                                248                 : 
 9345 bruce                     249              35 :                 goto found;
                                250                 :             }
                                251                 :         }
                                252                 : 
                                253                 :         /* No free space here, try to advance to next map page */
 7157 tgl                       254             126 :         _hash_relbuf(rel, mapbuf);
 2232 rhaas                     255             126 :         mapbuf = InvalidBuffer;
 7157 tgl                       256             126 :         i++;
                                257             126 :         j = 0;                  /* scan from start of next map page */
                                258             126 :         bit = 0;
                                259                 : 
                                260                 :         /* Reacquire exclusive lock on the meta page */
 2298 rhaas                     261             126 :         LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
                                262                 :     }
                                263                 : 
                                264                 :     /*
                                265                 :      * No free pages --- have to extend the relation to add an overflow page.
                                266                 :      * First, check to see if we have to add a new bitmap page too.
                                267                 :      */
 7160 tgl                       268             126 :     if (last_bit == (uint32) (BMPGSZ_BIT(metap) - 1))
                                269                 :     {
                                270                 :         /*
                                271                 :          * We create the new bitmap page with all pages marked "in use".
                                272                 :          * Actually two pages in the new bitmap's range will exist
                                273                 :          * immediately: the bitmap page itself, and the following page which
                                274                 :          * is the one we return to the caller.  Both of these are correctly
                                275                 :          * marked "in use".  Subsequent pages do not exist yet, but it is
                                276                 :          * convenient to pre-mark them as "in use" too.
                                277                 :          */
 7160 tgl                       278 UBC           0 :         bit = metap->hashm_spares[splitnum];
                                279                 : 
                                280                 :         /* metapage already has a write lock */
 2232 rhaas                     281               0 :         if (metap->hashm_nmaps >= HASH_MAX_BITMAPS)
                                282               0 :             ereport(ERROR,
                                283                 :                     (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                284                 :                      errmsg("out of overflow pages in hash index \"%s\"",
                                285                 :                             RelationGetRelationName(rel))));
                                286                 : 
                                287               0 :         newmapbuf = _hash_getnewbuf(rel, bitno_to_blkno(metap, bit), MAIN_FORKNUM);
                                288                 :     }
                                289                 :     else
                                290                 :     {
                                291                 :         /*
                                292                 :          * Nothing to do here; since the page will be past the last used page,
                                293                 :          * we know its bitmap bit was preinitialized to "in use".
                                294                 :          */
                                295                 :     }
                                296                 : 
                                297                 :     /* Calculate address of the new overflow page */
 2232 rhaas                     298 CBC         126 :     bit = BufferIsValid(newmapbuf) ?
                                299             126 :         metap->hashm_spares[splitnum] + 1 : metap->hashm_spares[splitnum];
 7160 tgl                       300             126 :     blkno = bitno_to_blkno(metap, bit);
                                301                 : 
                                302                 :     /*
                                303                 :      * Fetch the page with _hash_getnewbuf to ensure smgr's idea of the
                                304                 :      * relation length stays in sync with ours.  XXX It's annoying to do this
                                305                 :      * with metapage write lock held; would be better to use a lock that
                                306                 :      * doesn't block incoming searches.
                                307                 :      *
                                308                 :      * It is okay to hold two buffer locks here (one on tail page of bucket
                                309                 :      * and other on new overflow page) since there cannot be anyone else
                                310                 :      * contending for access to ovflbuf.
                                311                 :      */
 2232 rhaas                     312             126 :     ovflbuf = _hash_getnewbuf(rel, blkno, MAIN_FORKNUM);
                                313                 : 
                                314             161 : found:
                                315                 : 
                                316                 :     /*
                                317                 :      * Do the update.  No ereport(ERROR) until changes are logged. We want to
                                318                 :      * log the changes for bitmap page and overflow page together to avoid
                                319                 :      * loss of pages in case the new page is added.
                                320                 :      */
 2217                           321             161 :     START_CRIT_SECTION();
                                322                 : 
 2232                           323             161 :     if (page_found)
                                324                 :     {
                                325              35 :         Assert(BufferIsValid(mapbuf));
                                326                 : 
                                327                 :         /* mark page "in use" in the bitmap */
                                328              35 :         SETBIT(freep, bitmap_page_bit);
                                329              35 :         MarkBufferDirty(mapbuf);
                                330                 :     }
                                331                 :     else
                                332                 :     {
                                333                 :         /* update the count to indicate new overflow page is added */
                                334             126 :         metap->hashm_spares[splitnum]++;
                                335                 : 
                                336             126 :         if (BufferIsValid(newmapbuf))
                                337                 :         {
 2232 rhaas                     338 UBC           0 :             _hash_initbitmapbuffer(newmapbuf, metap->hashm_bmsize, false);
                                339               0 :             MarkBufferDirty(newmapbuf);
                                340                 : 
                                341                 :             /* add the new bitmap page to the metapage's list of bitmaps */
                                342               0 :             metap->hashm_mapp[metap->hashm_nmaps] = BufferGetBlockNumber(newmapbuf);
                                343               0 :             metap->hashm_nmaps++;
                                344               0 :             metap->hashm_spares[splitnum]++;
                                345                 :         }
                                346                 : 
 1893 rhaas                     347 CBC         126 :         MarkBufferDirty(metabuf);
                                348                 : 
                                349                 :         /*
                                350                 :          * for new overflow page, we don't need to explicitly set the bit in
                                351                 :          * bitmap page, as by default that will be set to "in use".
                                352                 :          */
                                353                 :     }
                                354                 : 
                                355                 :     /*
                                356                 :      * Adjust hashm_firstfree to avoid redundant searches.  But don't risk
                                357                 :      * changing it if someone moved it while we were searching bitmap pages.
                                358                 :      */
 7157 tgl                       359             161 :     if (metap->hashm_firstfree == orig_firstfree)
                                360                 :     {
                                361             161 :         metap->hashm_firstfree = bit + 1;
 2298 rhaas                     362             161 :         MarkBufferDirty(metabuf);
                                363                 :     }
                                364                 : 
                                365                 :     /* initialize new overflow page */
 2232                           366             161 :     ovflpage = BufferGetPage(ovflbuf);
  373 michael                   367             161 :     ovflopaque = HashPageGetOpaque(ovflpage);
 2232 rhaas                     368             161 :     ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
                                369             161 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
                                370             161 :     ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
                                371             161 :     ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
                                372             161 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
                                373                 : 
                                374             161 :     MarkBufferDirty(ovflbuf);
                                375                 : 
                                376                 :     /* logically chain overflow page to previous page */
                                377             161 :     pageopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
                                378                 : 
                                379             161 :     MarkBufferDirty(buf);
                                380                 : 
                                381                 :     /* XLOG stuff */
 2217                           382             161 :     if (RelationNeedsWAL(rel))
                                383                 :     {
                                384                 :         XLogRecPtr  recptr;
                                385                 :         xl_hash_add_ovfl_page xlrec;
                                386                 : 
                                387             147 :         xlrec.bmpage_found = page_found;
                                388             147 :         xlrec.bmsize = metap->hashm_bmsize;
                                389                 : 
                                390             147 :         XLogBeginInsert();
                                391             147 :         XLogRegisterData((char *) &xlrec, SizeOfHashAddOvflPage);
                                392                 : 
                                393             147 :         XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
                                394             147 :         XLogRegisterBufData(0, (char *) &pageopaque->hasho_bucket, sizeof(Bucket));
                                395                 : 
                                396             147 :         XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
                                397                 : 
                                398             147 :         if (BufferIsValid(mapbuf))
                                399                 :         {
                                400              35 :             XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
                                401              35 :             XLogRegisterBufData(2, (char *) &bitmap_page_bit, sizeof(uint32));
                                402                 :         }
                                403                 : 
                                404             147 :         if (BufferIsValid(newmapbuf))
 2217 rhaas                     405 UBC           0 :             XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
                                406                 : 
 2217 rhaas                     407 CBC         147 :         XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
                                408             147 :         XLogRegisterBufData(4, (char *) &metap->hashm_firstfree, sizeof(uint32));
                                409                 : 
                                410             147 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
                                411                 : 
                                412             147 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
                                413             147 :         PageSetLSN(BufferGetPage(buf), recptr);
                                414                 : 
                                415             147 :         if (BufferIsValid(mapbuf))
                                416              35 :             PageSetLSN(BufferGetPage(mapbuf), recptr);
                                417                 : 
                                418             147 :         if (BufferIsValid(newmapbuf))
 2217 rhaas                     419 UBC           0 :             PageSetLSN(BufferGetPage(newmapbuf), recptr);
                                420                 : 
 2217 rhaas                     421 CBC         147 :         PageSetLSN(BufferGetPage(metabuf), recptr);
                                422                 :     }
                                423                 : 
                                424             161 :     END_CRIT_SECTION();
                                425                 : 
 2232                           426             161 :     if (retain_pin)
                                427              47 :         LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                428                 :     else
                                429             114 :         _hash_relbuf(rel, buf);
                                430                 : 
                                431             161 :     if (BufferIsValid(mapbuf))
                                432              35 :         _hash_relbuf(rel, mapbuf);
                                433                 : 
                                434             161 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
                                435                 : 
                                436             161 :     if (BufferIsValid(newmapbuf))
 2232 rhaas                     437 UBC           0 :         _hash_relbuf(rel, newmapbuf);
                                438                 : 
 2232 rhaas                     439 CBC         161 :     return ovflbuf;
                                440                 : }
                                441                 : 
                                442                 : /*
                                443                 :  *  _hash_firstfreebit()
                                444                 :  *
                                445                 :  *  Return the number of the first bit that is not set in the word 'map'.
                                446                 :  */
                                447                 : static uint32
 9770 scrappy                   448              35 : _hash_firstfreebit(uint32 map)
                                449                 : {
                                450                 :     uint32      i,
                                451                 :                 mask;
                                452                 : 
 9345 bruce                     453              35 :     mask = 0x1;
                                454             289 :     for (i = 0; i < BITS_PER_MAP; i++)
                                455                 :     {
                                456             289 :         if (!(mask & map))
 8986                           457              35 :             return i;
 7160 tgl                       458             254 :         mask <<= 1;
                                459                 :     }
                                460                 : 
 7157 tgl                       461 UBC           0 :     elog(ERROR, "firstfreebit found no free bit");
                                462                 : 
                                463                 :     return 0;                   /* keep compiler quiet */
                                464                 : }
                                465                 : 
                                466                 : /*
                                467                 :  *  _hash_freeovflpage() -
                                468                 :  *
                                469                 :  *  Remove this overflow page from its bucket's chain, and mark the page as
                                470                 :  *  free.  On entry, ovflbuf is write-locked; it is released before exiting.
                                471                 :  *
                                472                 :  *  Add the tuples (itups) to wbuf in this function.  We could do that in the
                                473                 :  *  caller as well, but the advantage of doing it here is we can easily write
                                474                 :  *  the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
                                475                 :  *  removal of overflow page has to done as an atomic operation, otherwise
                                476                 :  *  during replay on standby users might find duplicate records.
                                477                 :  *
                                478                 :  *  Since this function is invoked in VACUUM, we provide an access strategy
                                479                 :  *  parameter that controls fetches of the bucket pages.
                                480                 :  *
                                481                 :  *  Returns the block number of the page that followed the given page
                                482                 :  *  in the bucket, or InvalidBlockNumber if no following page.
                                483                 :  *
                                484                 :  *  NB: caller must not hold lock on metapage, nor on page, that's next to
                                485                 :  *  ovflbuf in the bucket chain.  We don't acquire the lock on page that's
                                486                 :  *  prior to ovflbuf in chain if it is same as wbuf because the caller already
                                487                 :  *  has a lock on same.
                                488                 :  */
                                489                 : BlockNumber
 2232 rhaas                     490 CBC          65 : _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
                                491                 :                    Buffer wbuf, IndexTuple *itups, OffsetNumber *itup_offsets,
                                492                 :                    Size *tups_size, uint16 nitups,
                                493                 :                    BufferAccessStrategy bstrategy)
                                494                 : {
                                495                 :     HashMetaPage metap;
                                496                 :     Buffer      metabuf;
                                497                 :     Buffer      mapbuf;
                                498                 :     BlockNumber ovflblkno;
                                499                 :     BlockNumber prevblkno;
                                500                 :     BlockNumber blkno;
                                501                 :     BlockNumber nextblkno;
                                502                 :     BlockNumber writeblkno;
                                503                 :     HashPageOpaque ovflopaque;
                                504                 :     Page        ovflpage;
                                505                 :     Page        mappage;
                                506                 :     uint32     *freep;
                                507                 :     uint32      ovflbitno;
                                508                 :     int32       bitmappage,
                                509                 :                 bitmapbit;
                                510                 :     Bucket      bucket PG_USED_FOR_ASSERTS_ONLY;
 2217                           511              65 :     Buffer      prevbuf = InvalidBuffer;
                                512              65 :     Buffer      nextbuf = InvalidBuffer;
                                513              65 :     bool        update_metap = false;
                                514                 : 
                                515                 :     /* Get information from the doomed page */
 6363 tgl                       516              65 :     _hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
 6797 bruce                     517              65 :     ovflblkno = BufferGetBlockNumber(ovflbuf);
 2545 kgrittn                   518              65 :     ovflpage = BufferGetPage(ovflbuf);
  373 michael                   519              65 :     ovflopaque = HashPageGetOpaque(ovflpage);
 9345 bruce                     520              65 :     nextblkno = ovflopaque->hasho_nextblkno;
                                521              65 :     prevblkno = ovflopaque->hasho_prevblkno;
 2321 rhaas                     522              65 :     writeblkno = BufferGetBlockNumber(wbuf);
 9345 bruce                     523              65 :     bucket = ovflopaque->hasho_bucket;
                                524                 : 
                                525                 :     /*
                                526                 :      * Fix up the bucket chain.  this is a doubly-linked list, so we must fix
                                527                 :      * up the bucket chain members behind and ahead of the overflow page being
                                528                 :      * deleted.  Concurrency issues are avoided by using lock chaining as
                                529                 :      * described atop hashbucketcleanup.
                                530                 :      */
                                531              65 :     if (BlockNumberIsValid(prevblkno))
                                532                 :     {
 2321 rhaas                     533              65 :         if (prevblkno == writeblkno)
                                534              26 :             prevbuf = wbuf;
                                535                 :         else
                                536              39 :             prevbuf = _hash_getbuf_with_strategy(rel,
                                537                 :                                                  prevblkno,
                                538                 :                                                  HASH_WRITE,
                                539                 :                                                  LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
                                540                 :                                                  bstrategy);
                                541                 :     }
 9345 bruce                     542              65 :     if (BlockNumberIsValid(nextblkno))
 2232 rhaas                     543 UBC           0 :         nextbuf = _hash_getbuf_with_strategy(rel,
                                544                 :                                              nextblkno,
                                545                 :                                              HASH_WRITE,
                                546                 :                                              LH_OVERFLOW_PAGE,
                                547                 :                                              bstrategy);
                                548                 : 
                                549                 :     /* Note: bstrategy is intentionally not used for metapage and bitmap */
                                550                 : 
                                551                 :     /* Read the metapage so we can determine which bitmap page to use */
 5820 tgl                       552 CBC          65 :     metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
 2545 kgrittn                   553              65 :     metap = HashPageGetMeta(BufferGetPage(metabuf));
                                554                 : 
                                555                 :     /* Identify which bit to set */
 2257 rhaas                     556              65 :     ovflbitno = _hash_ovflblkno_to_bitno(metap, ovflblkno);
                                557                 : 
 7160 tgl                       558              65 :     bitmappage = ovflbitno >> BMPG_SHIFT(metap);
                                559              65 :     bitmapbit = ovflbitno & BMPG_MASK(metap);
                                560                 : 
                                561              65 :     if (bitmappage >= metap->hashm_nmaps)
 7160 tgl                       562 UBC           0 :         elog(ERROR, "invalid overflow bit number %u", ovflbitno);
 9345 bruce                     563 CBC          65 :     blkno = metap->hashm_mapp[bitmappage];
                                564                 : 
                                565                 :     /* Release metapage lock while we access the bitmap page */
 2298 rhaas                     566              65 :     LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
                                567                 : 
                                568                 :     /* read the bitmap page to clear the bitmap bit */
 5820 tgl                       569              65 :     mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_BITMAP_PAGE);
 2545 kgrittn                   570              65 :     mappage = BufferGetPage(mapbuf);
 9345 bruce                     571              65 :     freep = HashPageGetBitmap(mappage);
 7157 tgl                       572              65 :     Assert(ISSET(freep, bitmapbit));
                                573                 : 
                                574                 :     /* Get write-lock on metapage to update firstfree */
 2298 rhaas                     575              65 :     LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
                                576                 : 
                                577                 :     /* This operation needs to log multiple tuples, prepare WAL for that */
 2217                           578              65 :     if (RelationNeedsWAL(rel))
                                579              65 :         XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
                                580                 : 
                                581              65 :     START_CRIT_SECTION();
                                582                 : 
                                583                 :     /*
                                584                 :      * we have to insert tuples on the "write" page, being careful to preserve
                                585                 :      * hashkey ordering.  (If we insert many tuples into the same "write" page
                                586                 :      * it would be worth qsort'ing them).
                                587                 :      */
 2232                           588              65 :     if (nitups > 0)
                                589                 :     {
                                590              29 :         _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
                                591              29 :         MarkBufferDirty(wbuf);
                                592                 :     }
                                593                 : 
                                594                 :     /*
                                595                 :      * Reinitialize the freed overflow page.  Just zeroing the page won't
                                596                 :      * work, because WAL replay routines expect pages to be initialized. See
                                597                 :      * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.  We are
                                598                 :      * careful to make the special space valid here so that tools like
                                599                 :      * pageinspect won't get confused.
                                600                 :      */
                                601              65 :     _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
                                602                 : 
  373 michael                   603              65 :     ovflopaque = HashPageGetOpaque(ovflpage);
                                604                 : 
 2195 rhaas                     605              65 :     ovflopaque->hasho_prevblkno = InvalidBlockNumber;
                                606              65 :     ovflopaque->hasho_nextblkno = InvalidBlockNumber;
  646 peter                     607              65 :     ovflopaque->hasho_bucket = InvalidBucket;
 2195 rhaas                     608              65 :     ovflopaque->hasho_flag = LH_UNUSED_PAGE;
                                609              65 :     ovflopaque->hasho_page_id = HASHO_PAGE_ID;
                                610                 : 
 2232                           611              65 :     MarkBufferDirty(ovflbuf);
                                612                 : 
                                613              65 :     if (BufferIsValid(prevbuf))
                                614                 :     {
                                615              65 :         Page        prevpage = BufferGetPage(prevbuf);
  373 michael                   616              65 :         HashPageOpaque prevopaque = HashPageGetOpaque(prevpage);
                                617                 : 
 2232 rhaas                     618              65 :         Assert(prevopaque->hasho_bucket == bucket);
                                619              65 :         prevopaque->hasho_nextblkno = nextblkno;
                                620              65 :         MarkBufferDirty(prevbuf);
                                621                 :     }
                                622              65 :     if (BufferIsValid(nextbuf))
                                623                 :     {
 2232 rhaas                     624 UBC           0 :         Page        nextpage = BufferGetPage(nextbuf);
  373 michael                   625               0 :         HashPageOpaque nextopaque = HashPageGetOpaque(nextpage);
                                626                 : 
 2232 rhaas                     627               0 :         Assert(nextopaque->hasho_bucket == bucket);
                                628               0 :         nextopaque->hasho_prevblkno = prevblkno;
                                629               0 :         MarkBufferDirty(nextbuf);
                                630                 :     }
                                631                 : 
                                632                 :     /* Clear the bitmap bit to indicate that this overflow page is free */
 2232 rhaas                     633 CBC          65 :     CLRBIT(freep, bitmapbit);
                                634              65 :     MarkBufferDirty(mapbuf);
                                635                 : 
                                636                 :     /* if this is now the first free page, update hashm_firstfree */
 7160 tgl                       637              65 :     if (ovflbitno < metap->hashm_firstfree)
                                638                 :     {
                                639              62 :         metap->hashm_firstfree = ovflbitno;
 2217 rhaas                     640              62 :         update_metap = true;
 2305                           641              62 :         MarkBufferDirty(metabuf);
                                642                 :     }
                                643                 : 
                                644                 :     /* XLOG stuff */
 2217                           645              65 :     if (RelationNeedsWAL(rel))
                                646                 :     {
                                647                 :         xl_hash_squeeze_page xlrec;
                                648                 :         XLogRecPtr  recptr;
                                649                 :         int         i;
                                650                 : 
                                651              65 :         xlrec.prevblkno = prevblkno;
                                652              65 :         xlrec.nextblkno = nextblkno;
                                653              65 :         xlrec.ntups = nitups;
                                654              65 :         xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
                                655              65 :         xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
                                656                 : 
                                657              65 :         XLogBeginInsert();
                                658              65 :         XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
                                659                 : 
                                660                 :         /*
                                661                 :          * bucket buffer needs to be registered to ensure that we can acquire
                                662                 :          * a cleanup lock on it during replay.
                                663                 :          */
                                664              65 :         if (!xlrec.is_prim_bucket_same_wrt)
                                665               3 :             XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
                                666                 : 
                                667              65 :         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
                                668              65 :         if (xlrec.ntups > 0)
                                669                 :         {
                                670              29 :             XLogRegisterBufData(1, (char *) itup_offsets,
                                671                 :                                 nitups * sizeof(OffsetNumber));
                                672            1105 :             for (i = 0; i < nitups; i++)
                                673            1076 :                 XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
                                674                 :         }
                                675                 : 
                                676              65 :         XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
                                677                 : 
                                678                 :         /*
                                679                 :          * If prevpage and the writepage (block in which we are moving tuples
                                680                 :          * from overflow) are same, then no need to separately register
                                681                 :          * prevpage.  During replay, we can directly update the nextblock in
                                682                 :          * writepage.
                                683                 :          */
                                684              65 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
                                685              39 :             XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
                                686                 : 
                                687              65 :         if (BufferIsValid(nextbuf))
 2217 rhaas                     688 UBC           0 :             XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
                                689                 : 
 2217 rhaas                     690 CBC          65 :         XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
                                691              65 :         XLogRegisterBufData(5, (char *) &bitmapbit, sizeof(uint32));
                                692                 : 
                                693              65 :         if (update_metap)
                                694                 :         {
                                695              62 :             XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
                                696              62 :             XLogRegisterBufData(6, (char *) &metap->hashm_firstfree, sizeof(uint32));
                                697                 :         }
                                698                 : 
                                699              65 :         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
                                700                 : 
                                701              65 :         PageSetLSN(BufferGetPage(wbuf), recptr);
                                702              65 :         PageSetLSN(BufferGetPage(ovflbuf), recptr);
                                703                 : 
                                704              65 :         if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
                                705              39 :             PageSetLSN(BufferGetPage(prevbuf), recptr);
                                706              65 :         if (BufferIsValid(nextbuf))
 2217 rhaas                     707 UBC           0 :             PageSetLSN(BufferGetPage(nextbuf), recptr);
                                708                 : 
 2217 rhaas                     709 CBC          65 :         PageSetLSN(BufferGetPage(mapbuf), recptr);
                                710                 : 
                                711              65 :         if (update_metap)
                                712              62 :             PageSetLSN(BufferGetPage(metabuf), recptr);
                                713                 :     }
                                714                 : 
                                715              65 :     END_CRIT_SECTION();
                                716                 : 
                                717                 :     /* release previous bucket if it is not same as write bucket */
 2232                           718              65 :     if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
                                719              39 :         _hash_relbuf(rel, prevbuf);
                                720                 : 
                                721              65 :     if (BufferIsValid(ovflbuf))
                                722              65 :         _hash_relbuf(rel, ovflbuf);
                                723                 : 
                                724              65 :     if (BufferIsValid(nextbuf))
 2232 rhaas                     725 UBC           0 :         _hash_relbuf(rel, nextbuf);
                                726                 : 
 2232 rhaas                     727 CBC          65 :     _hash_relbuf(rel, mapbuf);
 2305                           728              65 :     _hash_relbuf(rel, metabuf);
                                729                 : 
 7160 tgl                       730              65 :     return nextblkno;
                                731                 : }
                                732                 : 
                                733                 : 
                                734                 : /*
                                735                 :  *  _hash_initbitmapbuffer()
                                736                 :  *
                                737                 :  *   Initialize a new bitmap page.  All bits in the new bitmap page are set to
                                738                 :  *   "1", indicating "in use".
                                739                 :  */
                                740                 : void
 2232 rhaas                     741             170 : _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
                                742                 : {
                                743                 :     Page        pg;
                                744                 :     HashPageOpaque op;
                                745                 :     uint32     *freep;
                                746                 : 
                                747             170 :     pg = BufferGetPage(buf);
                                748                 : 
                                749                 :     /* initialize the page */
                                750             170 :     if (initpage)
                                751              21 :         _hash_pageinit(pg, BufferGetPageSize(buf));
                                752                 : 
                                753                 :     /* initialize the page's special space */
  373 michael                   754             170 :     op = HashPageGetOpaque(pg);
 2232 rhaas                     755             170 :     op->hasho_prevblkno = InvalidBlockNumber;
                                756             170 :     op->hasho_nextblkno = InvalidBlockNumber;
  646 peter                     757             170 :     op->hasho_bucket = InvalidBucket;
 2232 rhaas                     758             170 :     op->hasho_flag = LH_BITMAP_PAGE;
                                759             170 :     op->hasho_page_id = HASHO_PAGE_ID;
                                760                 : 
                                761                 :     /* set all of the bits to 1 */
                                762             170 :     freep = HashPageGetBitmap(pg);
  282 peter                     763 GNC         170 :     memset(freep, 0xFF, bmsize);
                                764                 : 
                                765                 :     /*
                                766                 :      * Set pd_lower just past the end of the bitmap page data.  We could even
                                767                 :      * set pd_lower equal to pd_upper, but this is more precise and makes the
                                768                 :      * page look compressible to xlog.c.
                                769                 :      */
 2232 rhaas                     770 CBC         170 :     ((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
                                771             170 : }
                                772                 : 
                                773                 : 
                                774                 : /*
                                775                 :  *  _hash_squeezebucket(rel, bucket)
                                776                 :  *
                                777                 :  *  Try to squeeze the tuples onto pages occurring earlier in the
                                778                 :  *  bucket chain in an attempt to free overflow pages. When we start
                                779                 :  *  the "squeezing", the page from which we start taking tuples (the
                                780                 :  *  "read" page) is the last bucket in the bucket chain and the page
                                781                 :  *  onto which we start squeezing tuples (the "write" page) is the
                                782                 :  *  first page in the bucket chain.  The read page works backward and
                                783                 :  *  the write page works forward; the procedure terminates when the
                                784                 :  *  read page and write page are the same page.
                                785                 :  *
                                786                 :  *  At completion of this procedure, it is guaranteed that all pages in
                                787                 :  *  the bucket are nonempty, unless the bucket is totally empty (in
                                788                 :  *  which case all overflow pages will be freed).  The original implementation
                                789                 :  *  required that to be true on entry as well, but it's a lot easier for
                                790                 :  *  callers to leave empty overflow pages and let this guy clean it up.
                                791                 :  *
                                792                 :  *  Caller must acquire cleanup lock on the primary page of the target
                                793                 :  *  bucket to exclude any scans that are in progress, which could easily
                                794                 :  *  be confused into returning the same tuple more than once or some tuples
                                795                 :  *  not at all by the rearrangement we are performing here.  To prevent
                                796                 :  *  any concurrent scan to cross the squeeze scan we use lock chaining
                                797                 :  *  similar to hashbucketcleanup.  Refer comments atop hashbucketcleanup.
                                798                 :  *
                                799                 :  *  We need to retain a pin on the primary bucket to ensure that no concurrent
                                800                 :  *  split can start.
                                801                 :  *
                                802                 :  *  Since this function is invoked in VACUUM, we provide an access strategy
                                803                 :  *  parameter that controls fetches of the bucket pages.
                                804                 :  */
                                805                 : void
 9770 scrappy                   806             684 : _hash_squeezebucket(Relation rel,
                                807                 :                     Bucket bucket,
                                808                 :                     BlockNumber bucket_blkno,
                                809                 :                     Buffer bucket_buf,
                                810                 :                     BufferAccessStrategy bstrategy)
                                811                 : {
                                812                 :     BlockNumber wblkno;
                                813                 :     BlockNumber rblkno;
                                814                 :     Buffer      wbuf;
                                815                 :     Buffer      rbuf;
                                816                 :     Page        wpage;
                                817                 :     Page        rpage;
                                818                 :     HashPageOpaque wopaque;
                                819                 :     HashPageOpaque ropaque;
                                820                 : 
                                821                 :     /*
                                822                 :      * start squeezing into the primary bucket page.
                                823                 :      */
 7159 tgl                       824             684 :     wblkno = bucket_blkno;
 2321 rhaas                     825             684 :     wbuf = bucket_buf;
 2545 kgrittn                   826             684 :     wpage = BufferGetPage(wbuf);
  373 michael                   827             684 :     wopaque = HashPageGetOpaque(wpage);
                                828                 : 
                                829                 :     /*
                                830                 :      * if there aren't any overflow pages, there's nothing to squeeze. caller
                                831                 :      * is responsible for releasing the pin on primary bucket page.
                                832                 :      */
 9345 bruce                     833             684 :     if (!BlockNumberIsValid(wopaque->hasho_nextblkno))
                                834                 :     {
 2298 rhaas                     835             655 :         LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
 9770 scrappy                   836             655 :         return;
                                837                 :     }
                                838                 : 
                                839                 :     /*
                                840                 :      * Find the last page in the bucket chain by starting at the base bucket
                                841                 :      * page and working forward.  Note: we assume that a hash bucket chain is
                                842                 :      * usually smaller than the buffer ring being used by VACUUM, else using
                                843                 :      * the access strategy here would be counterproductive.
                                844                 :      */
 4907 tgl                       845              29 :     rbuf = InvalidBuffer;
 9345 bruce                     846              29 :     ropaque = wopaque;
                                847                 :     do
                                848                 :     {
                                849             143 :         rblkno = ropaque->hasho_nextblkno;
 4907 tgl                       850             143 :         if (rbuf != InvalidBuffer)
 7157                           851             114 :             _hash_relbuf(rel, rbuf);
 5793                           852             143 :         rbuf = _hash_getbuf_with_strategy(rel,
                                853                 :                                           rblkno,
                                854                 :                                           HASH_WRITE,
                                855                 :                                           LH_OVERFLOW_PAGE,
                                856                 :                                           bstrategy);
 2545 kgrittn                   857             143 :         rpage = BufferGetPage(rbuf);
  373 michael                   858             143 :         ropaque = HashPageGetOpaque(rpage);
 9345 bruce                     859             143 :         Assert(ropaque->hasho_bucket == bucket);
                                860             143 :     } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
                                861                 : 
                                862                 :     /*
                                863                 :      * squeeze the tuples.
                                864                 :      */
                                865                 :     for (;;)
                                866              39 :     {
                                867                 :         OffsetNumber roffnum;
                                868                 :         OffsetNumber maxroffnum;
                                869                 :         OffsetNumber deletable[MaxOffsetNumber];
                                870                 :         IndexTuple  itups[MaxIndexTuplesPerPage];
                                871                 :         Size        tups_size[MaxIndexTuplesPerPage];
                                872                 :         OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
 2232 rhaas                     873              68 :         uint16      ndeletable = 0;
                                874              68 :         uint16      nitups = 0;
                                875              68 :         Size        all_tups_size = 0;
                                876                 :         int         i;
 2321                           877              68 :         bool        retain_pin = false;
                                878                 : 
 2232                           879              68 : readpage:
                                880                 :         /* Scan each tuple in "read" page */
 4907 tgl                       881              68 :         maxroffnum = PageGetMaxOffsetNumber(rpage);
                                882              68 :         for (roffnum = FirstOffsetNumber;
                                883            1144 :              roffnum <= maxroffnum;
                                884            1076 :              roffnum = OffsetNumberNext(roffnum))
                                885                 :         {
                                886                 :             IndexTuple  itup;
                                887                 :             Size        itemsz;
                                888                 : 
                                889                 :             /* skip dead tuples */
 2343 rhaas                     890            1079 :             if (ItemIdIsDead(PageGetItemId(rpage, roffnum)))
 2343 rhaas                     891 UBC           0 :                 continue;
                                892                 : 
 6283 tgl                       893 CBC        1079 :             itup = (IndexTuple) PageGetItem(rpage,
                                894                 :                                             PageGetItemId(rpage, roffnum));
 1866                           895            1079 :             itemsz = IndexTupleSize(itup);
 7157                           896            1079 :             itemsz = MAXALIGN(itemsz);
                                897                 : 
                                898                 :             /*
                                899                 :              * Walk up the bucket chain, looking for a page big enough for
                                900                 :              * this item and all other accumulated items.  Exit if we reach
                                901                 :              * the read page.
                                902                 :              */
 2232 rhaas                     903            1154 :             while (PageGetFreeSpaceForMultipleTuples(wpage, nitups + 1) < (all_tups_size + itemsz))
                                904                 :             {
 2321                           905              78 :                 Buffer      next_wbuf = InvalidBuffer;
 2232                           906              78 :                 bool        tups_moved = false;
                                907                 : 
 7157 tgl                       908              78 :                 Assert(!PageIsEmpty(wpage));
                                909                 : 
 2321 rhaas                     910              78 :                 if (wblkno == bucket_blkno)
                                911               6 :                     retain_pin = true;
                                912                 : 
 7157 tgl                       913              78 :                 wblkno = wopaque->hasho_nextblkno;
                                914              78 :                 Assert(BlockNumberIsValid(wblkno));
                                915                 : 
                                916                 :                 /* don't need to move to next page if we reached the read page */
 2321 rhaas                     917              78 :                 if (wblkno != rblkno)
                                918              75 :                     next_wbuf = _hash_getbuf_with_strategy(rel,
                                919                 :                                                            wblkno,
                                920                 :                                                            HASH_WRITE,
                                921                 :                                                            LH_OVERFLOW_PAGE,
                                922                 :                                                            bstrategy);
                                923                 : 
 2232                           924              78 :                 if (nitups > 0)
                                925                 :                 {
 2232 rhaas                     926 UBC           0 :                     Assert(nitups == ndeletable);
                                927                 : 
                                928                 :                     /*
                                929                 :                      * This operation needs to log multiple tuples, prepare
                                930                 :                      * WAL for that.
                                931                 :                      */
 2217                           932               0 :                     if (RelationNeedsWAL(rel))
                                933               0 :                         XLogEnsureRecordSpace(0, 3 + nitups);
                                934                 : 
                                935               0 :                     START_CRIT_SECTION();
                                936                 : 
                                937                 :                     /*
                                938                 :                      * we have to insert tuples on the "write" page, being
                                939                 :                      * careful to preserve hashkey ordering.  (If we insert
                                940                 :                      * many tuples into the same "write" page it would be
                                941                 :                      * worth qsort'ing them).
                                942                 :                      */
 2232                           943               0 :                     _hash_pgaddmultitup(rel, wbuf, itups, itup_offsets, nitups);
                                944               0 :                     MarkBufferDirty(wbuf);
                                945                 : 
                                946                 :                     /* Delete tuples we already moved off read page */
                                947               0 :                     PageIndexMultiDelete(rpage, deletable, ndeletable);
                                948               0 :                     MarkBufferDirty(rbuf);
                                949                 : 
                                950                 :                     /* XLOG stuff */
 2217                           951               0 :                     if (RelationNeedsWAL(rel))
                                952                 :                     {
                                953                 :                         XLogRecPtr  recptr;
                                954                 :                         xl_hash_move_page_contents xlrec;
                                955                 : 
                                956               0 :                         xlrec.ntups = nitups;
  578 michael                   957               0 :                         xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf);
                                958                 : 
 2217 rhaas                     959               0 :                         XLogBeginInsert();
                                960               0 :                         XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
                                961                 : 
                                962                 :                         /*
                                963                 :                          * bucket buffer needs to be registered to ensure that
                                964                 :                          * we can acquire a cleanup lock on it during replay.
                                965                 :                          */
                                966               0 :                         if (!xlrec.is_prim_bucket_same_wrt)
                                967               0 :                             XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
                                968                 : 
                                969               0 :                         XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
                                970               0 :                         XLogRegisterBufData(1, (char *) itup_offsets,
                                971                 :                                             nitups * sizeof(OffsetNumber));
                                972               0 :                         for (i = 0; i < nitups; i++)
                                973               0 :                             XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
                                974                 : 
                                975               0 :                         XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
                                976               0 :                         XLogRegisterBufData(2, (char *) deletable,
                                977                 :                                             ndeletable * sizeof(OffsetNumber));
                                978                 : 
                                979               0 :                         recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
                                980                 : 
                                981               0 :                         PageSetLSN(BufferGetPage(wbuf), recptr);
                                982               0 :                         PageSetLSN(BufferGetPage(rbuf), recptr);
                                983                 :                     }
                                984                 : 
                                985               0 :                     END_CRIT_SECTION();
                                986                 : 
 2232                           987               0 :                     tups_moved = true;
                                988                 :                 }
                                989                 : 
                                990                 :                 /*
                                991                 :                  * release the lock on previous page after acquiring the lock
                                992                 :                  * on next page
                                993                 :                  */
 2305 rhaas                     994 CBC          78 :                 if (retain_pin)
 2298                           995               6 :                     LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
                                996                 :                 else
 4907 tgl                       997              72 :                     _hash_relbuf(rel, wbuf);
                                998                 : 
                                999                 :                 /* nothing more to do if we reached the read page */
 7157                          1000              78 :                 if (rblkno == wblkno)
                               1001                 :                 {
 2305 rhaas                    1002               3 :                     _hash_relbuf(rel, rbuf);
 7157 tgl                      1003              29 :                     return;
                               1004                 :                 }
                               1005                 : 
 2321 rhaas                    1006              75 :                 wbuf = next_wbuf;
 2545 kgrittn                  1007              75 :                 wpage = BufferGetPage(wbuf);
  373 michael                  1008              75 :                 wopaque = HashPageGetOpaque(wpage);
 7157 tgl                      1009              75 :                 Assert(wopaque->hasho_bucket == bucket);
 2321 rhaas                    1010              75 :                 retain_pin = false;
                               1011                 : 
                               1012                 :                 /* be tidy */
 2232                          1013              75 :                 for (i = 0; i < nitups; i++)
 2232 rhaas                    1014 UBC           0 :                     pfree(itups[i]);
 2232 rhaas                    1015 CBC          75 :                 nitups = 0;
                               1016              75 :                 all_tups_size = 0;
                               1017              75 :                 ndeletable = 0;
                               1018                 : 
                               1019                 :                 /*
                               1020                 :                  * after moving the tuples, rpage would have been compacted,
                               1021                 :                  * so we need to rescan it.
                               1022                 :                  */
                               1023              75 :                 if (tups_moved)
 2232 rhaas                    1024 UBC           0 :                     goto readpage;
                               1025                 :             }
                               1026                 : 
                               1027                 :             /* remember tuple for deletion from "read" page */
 4907 tgl                      1028 CBC        1076 :             deletable[ndeletable++] = roffnum;
                               1029                 : 
                               1030                 :             /*
                               1031                 :              * we need a copy of index tuples as they can be freed as part of
                               1032                 :              * overflow page, however we need them to write a WAL record in
                               1033                 :              * _hash_freeovflpage.
                               1034                 :              */
 2232 rhaas                    1035            1076 :             itups[nitups] = CopyIndexTuple(itup);
                               1036            1076 :             tups_size[nitups++] = itemsz;
                               1037            1076 :             all_tups_size += itemsz;
                               1038                 :         }
                               1039                 : 
                               1040                 :         /*
                               1041                 :          * If we reach here, there are no live tuples on the "read" page ---
                               1042                 :          * it was empty when we got to it, or we moved them all.  So we can
                               1043                 :          * just free the page without bothering with deleting tuples
                               1044                 :          * individually.  Then advance to the previous "read" page.
                               1045                 :          *
                               1046                 :          * Tricky point here: if our read and write pages are adjacent in the
                               1047                 :          * bucket chain, our write lock on wbuf will conflict with
                               1048                 :          * _hash_freeovflpage's attempt to update the sibling links of the
                               1049                 :          * removed page.  In that case, we don't need to lock it again.
                               1050                 :          */
 4907 tgl                      1051              65 :         rblkno = ropaque->hasho_prevblkno;
                               1052              65 :         Assert(BlockNumberIsValid(rblkno));
                               1053                 : 
                               1054                 :         /* free this overflow page (releases rbuf) */
 2232 rhaas                    1055              65 :         _hash_freeovflpage(rel, bucket_buf, rbuf, wbuf, itups, itup_offsets,
                               1056                 :                            tups_size, nitups, bstrategy);
                               1057                 : 
                               1058                 :         /* be tidy */
                               1059            1141 :         for (i = 0; i < nitups; i++)
                               1060            1076 :             pfree(itups[i]);
                               1061                 : 
                               1062                 :         /* are we freeing the page adjacent to wbuf? */
 4907 tgl                      1063              65 :         if (rblkno == wblkno)
                               1064                 :         {
                               1065                 :             /* retain the pin on primary bucket page till end of bucket scan */
 2302 rhaas                    1066              26 :             if (wblkno == bucket_blkno)
 2298                          1067              23 :                 LockBuffer(wbuf, BUFFER_LOCK_UNLOCK);
                               1068                 :             else
 2302                          1069               3 :                 _hash_relbuf(rel, wbuf);
 4907 tgl                      1070              26 :             return;
                               1071                 :         }
                               1072                 : 
                               1073              39 :         rbuf = _hash_getbuf_with_strategy(rel,
                               1074                 :                                           rblkno,
                               1075                 :                                           HASH_WRITE,
                               1076                 :                                           LH_OVERFLOW_PAGE,
                               1077                 :                                           bstrategy);
 2545 kgrittn                  1078              39 :         rpage = BufferGetPage(rbuf);
  373 michael                  1079              39 :         ropaque = HashPageGetOpaque(rpage);
 4907 tgl                      1080              39 :         Assert(ropaque->hasho_bucket == bucket);
                               1081                 :     }
                               1082                 : 
                               1083                 :     /* NOTREACHED */
                               1084                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a