LCOV - differential code coverage report
Current view: top level - src/backend/access/gin - ginget.c (source / functions) Coverage Total Hit UBC GNC CBC DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 83.1 % 632 525 107 2 523 2
Current Date: 2023-04-08 15:15:32 Functions: 93.8 % 16 15 1 1 14
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * ginget.c
       4                 :  *    fetch tuples from a GIN scan.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *          src/backend/access/gin/ginget.c
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/gin_private.h"
      18                 : #include "access/relscan.h"
      19                 : #include "common/pg_prng.h"
      20                 : #include "miscadmin.h"
      21                 : #include "storage/predicate.h"
      22                 : #include "utils/datum.h"
      23                 : #include "utils/memutils.h"
      24                 : #include "utils/rel.h"
      25                 : 
      26                 : /* GUC parameter */
      27                 : int         GinFuzzySearchLimit = 0;
      28                 : 
      29                 : typedef struct pendingPosition
      30                 : {
      31                 :     Buffer      pendingBuffer;
      32                 :     OffsetNumber firstOffset;
      33                 :     OffsetNumber lastOffset;
      34                 :     ItemPointerData item;
      35                 :     bool       *hasMatchKey;
      36                 : } pendingPosition;
      37                 : 
      38                 : 
      39                 : /*
      40                 :  * Goes to the next page if current offset is outside of bounds
      41                 :  */
      42                 : static bool
      43 CBC      203804 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
      44                 : {
      45          203804 :     Page        page = BufferGetPage(stack->buffer);
      46                 : 
      47          203804 :     if (stack->off > PageGetMaxOffsetNumber(page))
      48                 :     {
      49                 :         /*
      50                 :          * We scanned the whole page, so we should take right page
      51                 :          */
      52            1895 :         if (GinPageRightMost(page))
      53             183 :             return false;       /* no more pages */
      54                 : 
      55            1712 :         stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
      56            1712 :         stack->blkno = BufferGetBlockNumber(stack->buffer);
      57            1712 :         stack->off = FirstOffsetNumber;
      58            1712 :         PredicateLockPage(btree->index, stack->blkno, snapshot);
      59                 :     }
      60                 : 
      61          203621 :     return true;
      62                 : }
      63                 : 
      64                 : /*
      65                 :  * Scan all pages of a posting tree and save all its heap ItemPointers
      66                 :  * in scanEntry->matchBitmap
      67                 :  */
      68                 : static void
      69               6 : scanPostingTree(Relation index, GinScanEntry scanEntry,
      70                 :                 BlockNumber rootPostingTree, Snapshot snapshot)
      71                 : {
      72                 :     GinBtreeData btree;
      73                 :     GinBtreeStack *stack;
      74                 :     Buffer      buffer;
      75                 :     Page        page;
      76                 : 
      77                 :     /* Descend to the leftmost leaf page */
      78               6 :     stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
      79               6 :     buffer = stack->buffer;
      80                 : 
      81               6 :     IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
      82                 : 
      83               6 :     freeGinBtreeStack(stack);
      84                 : 
      85                 :     /*
      86                 :      * Loop iterates through all leaf pages of posting tree
      87                 :      */
      88                 :     for (;;)
      89                 :     {
      90              15 :         page = BufferGetPage(buffer);
      91              15 :         if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
      92                 :         {
      93              15 :             int         n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
      94                 : 
      95              15 :             scanEntry->predictNumberResult += n;
      96                 :         }
      97                 : 
      98              15 :         if (GinPageRightMost(page))
      99               6 :             break;              /* no more pages */
     100                 : 
     101               9 :         buffer = ginStepRight(buffer, index, GIN_SHARE);
     102                 :     }
     103                 : 
     104               6 :     UnlockReleaseBuffer(buffer);
     105               6 : }
     106                 : 
     107                 : /*
     108                 :  * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
     109                 :  * match the search entry.  This supports three different match modes:
     110                 :  *
     111                 :  * 1. Partial-match support: scan from current point until the
     112                 :  *    comparePartialFn says we're done.
     113                 :  * 2. SEARCH_MODE_ALL: scan from current point (which should be first
     114                 :  *    key for the current attnum) until we hit null items or end of attnum
     115                 :  * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
     116                 :  *    key for the current attnum) until we hit end of attnum
     117                 :  *
     118                 :  * Returns true if done, false if it's necessary to restart scan from scratch
     119                 :  */
     120                 : static bool
     121             267 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
     122                 :                    GinScanEntry scanEntry, Snapshot snapshot)
     123                 : {
     124                 :     OffsetNumber attnum;
     125                 :     Form_pg_attribute attr;
     126                 : 
     127                 :     /* Initialize empty bitmap result */
     128             267 :     scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
     129                 : 
     130                 :     /* Null query cannot partial-match anything */
     131             267 :     if (scanEntry->isPartialMatch &&
     132             126 :         scanEntry->queryCategory != GIN_CAT_NORM_KEY)
     133 UBC           0 :         return true;
     134                 : 
     135                 :     /* Locate tupdesc entry for key column (for attbyval/attlen data) */
     136 CBC         267 :     attnum = scanEntry->attnum;
     137             267 :     attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
     138                 : 
     139                 :     /*
     140                 :      * Predicate lock entry leaf page, following pages will be locked by
     141                 :      * moveRightIfItNeeded()
     142                 :      */
     143             267 :     PredicateLockPage(btree->index, stack->buffer, snapshot);
     144                 : 
     145                 :     for (;;)
     146          203531 :     {
     147                 :         Page        page;
     148                 :         IndexTuple  itup;
     149                 :         Datum       idatum;
     150                 :         GinNullCategory icategory;
     151                 : 
     152                 :         /*
     153                 :          * stack->off points to the interested entry, buffer is already locked
     154                 :          */
     155          203798 :         if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     156             267 :             return true;
     157                 : 
     158          203615 :         page = BufferGetPage(stack->buffer);
     159          203615 :         TestForOldSnapshot(snapshot, btree->index, page);
     160          203615 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     161                 : 
     162                 :         /*
     163                 :          * If tuple stores another attribute then stop scan
     164                 :          */
     165          203615 :         if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
     166 UBC           0 :             return true;
     167                 : 
     168                 :         /* Safe to fetch attribute value */
     169 CBC      203615 :         idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
     170                 : 
     171                 :         /*
     172                 :          * Check for appropriate scan stop conditions
     173                 :          */
     174          203615 :         if (scanEntry->isPartialMatch)
     175                 :         {
     176                 :             int32       cmp;
     177                 : 
     178                 :             /*
     179                 :              * In partial match, stop scan at any null (including
     180                 :              * placeholders); partial matches never match nulls
     181                 :              */
     182             662 :             if (icategory != GIN_CAT_NORM_KEY)
     183               5 :                 return true;
     184                 : 
     185                 :             /*----------
     186                 :              * Check of partial match.
     187                 :              * case cmp == 0 => match
     188                 :              * case cmp > 0 => not match and finish scan
     189                 :              * case cmp < 0 => not match and continue scan
     190                 :              *----------
     191                 :              */
     192             657 :             cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
     193             657 :                                                   btree->ginstate->supportCollation[attnum - 1],
     194                 :                                                   scanEntry->queryKey,
     195                 :                                                   idatum,
     196             657 :                                                   UInt16GetDatum(scanEntry->strategy),
     197             657 :                                                   PointerGetDatum(scanEntry->extra_data)));
     198                 : 
     199             657 :             if (cmp > 0)
     200              65 :                 return true;
     201             592 :             else if (cmp < 0)
     202                 :             {
     203              30 :                 stack->off++;
     204              30 :                 continue;
     205                 :             }
     206                 :         }
     207          202953 :         else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
     208                 :         {
     209                 :             /*
     210                 :              * In ALL mode, we are not interested in null items, so we can
     211                 :              * stop if we get to a null-item placeholder (which will be the
     212                 :              * last entry for a given attnum).  We do want to include NULL_KEY
     213                 :              * and EMPTY_ITEM entries, though.
     214                 :              */
     215          202953 :             if (icategory == GIN_CAT_NULL_ITEM)
     216              14 :                 return true;
     217                 :         }
     218                 : 
     219                 :         /*
     220                 :          * OK, we want to return the TIDs listed in this entry.
     221                 :          */
     222          203501 :         if (GinIsPostingTree(itup))
     223                 :         {
     224               6 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     225                 : 
     226                 :             /*
     227                 :              * We should unlock current page (but not unpin) during tree scan
     228                 :              * to prevent deadlock with vacuum processes.
     229                 :              *
     230                 :              * We save current entry value (idatum) to be able to re-find our
     231                 :              * tuple after re-locking
     232                 :              */
     233               6 :             if (icategory == GIN_CAT_NORM_KEY)
     234               6 :                 idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
     235                 : 
     236               6 :             LockBuffer(stack->buffer, GIN_UNLOCK);
     237                 : 
     238                 :             /*
     239                 :              * Acquire predicate lock on the posting tree.  We already hold a
     240                 :              * lock on the entry page, but insertions to the posting tree
     241                 :              * don't check for conflicts on that level.
     242                 :              */
     243               6 :             PredicateLockPage(btree->index, rootPostingTree, snapshot);
     244                 : 
     245                 :             /* Collect all the TIDs in this entry's posting tree */
     246               6 :             scanPostingTree(btree->index, scanEntry, rootPostingTree,
     247                 :                             snapshot);
     248                 : 
     249                 :             /*
     250                 :              * We lock again the entry page and while it was unlocked insert
     251                 :              * might have occurred, so we need to re-find our position.
     252                 :              */
     253               6 :             LockBuffer(stack->buffer, GIN_SHARE);
     254               6 :             page = BufferGetPage(stack->buffer);
     255               6 :             if (!GinPageIsLeaf(page))
     256                 :             {
     257                 :                 /*
     258                 :                  * Root page becomes non-leaf while we unlock it. We will
     259                 :                  * start again, this situation doesn't occur often - root can
     260                 :                  * became a non-leaf only once per life of index.
     261                 :                  */
     262 UBC           0 :                 return false;
     263                 :             }
     264                 : 
     265                 :             /* Search forward to re-find idatum */
     266                 :             for (;;)
     267                 :             {
     268 CBC           6 :                 if (moveRightIfItNeeded(btree, stack, snapshot) == false)
     269 UBC           0 :                     ereport(ERROR,
     270                 :                             (errcode(ERRCODE_INTERNAL_ERROR),
     271                 :                              errmsg("failed to re-find tuple within index \"%s\"",
     272                 :                                     RelationGetRelationName(btree->index))));
     273                 : 
     274 CBC           6 :                 page = BufferGetPage(stack->buffer);
     275               6 :                 itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
     276                 : 
     277               6 :                 if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
     278                 :                 {
     279                 :                     Datum       newDatum;
     280                 :                     GinNullCategory newCategory;
     281                 : 
     282               6 :                     newDatum = gintuple_get_key(btree->ginstate, itup,
     283                 :                                                 &newCategory);
     284                 : 
     285               6 :                     if (ginCompareEntries(btree->ginstate, attnum,
     286                 :                                           newDatum, newCategory,
     287                 :                                           idatum, icategory) == 0)
     288               6 :                         break;  /* Found! */
     289                 :                 }
     290                 : 
     291 UBC           0 :                 stack->off++;
     292                 :             }
     293                 : 
     294 CBC           6 :             if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
     295 UBC           0 :                 pfree(DatumGetPointer(idatum));
     296                 :         }
     297                 :         else
     298                 :         {
     299                 :             ItemPointer ipd;
     300                 :             int         nipd;
     301                 : 
     302 CBC      203495 :             ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
     303          203495 :             tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
     304          203495 :             scanEntry->predictNumberResult += GinGetNPosting(itup);
     305          203495 :             pfree(ipd);
     306                 :         }
     307                 : 
     308                 :         /*
     309                 :          * Done with this entry, go to the next
     310                 :          */
     311          203501 :         stack->off++;
     312                 :     }
     313                 : }
     314                 : 
     315                 : /*
     316                 :  * Start* functions setup beginning state of searches: finds correct buffer and pins it.
     317                 :  */
     318                 : static void
     319            1846 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
     320                 : {
     321                 :     GinBtreeData btreeEntry;
     322                 :     GinBtreeStack *stackEntry;
     323                 :     Page        page;
     324                 :     bool        needUnlock;
     325                 : 
     326            1846 : restartScanEntry:
     327            1846 :     entry->buffer = InvalidBuffer;
     328            1846 :     ItemPointerSetMin(&entry->curItem);
     329            1846 :     entry->offset = InvalidOffsetNumber;
     330            1846 :     if (entry->list)
     331 UBC           0 :         pfree(entry->list);
     332 CBC        1846 :     entry->list = NULL;
     333            1846 :     entry->nlist = 0;
     334            1846 :     entry->matchBitmap = NULL;
     335            1846 :     entry->matchResult = NULL;
     336            1846 :     entry->reduceResult = false;
     337            1846 :     entry->predictNumberResult = 0;
     338                 : 
     339                 :     /*
     340                 :      * we should find entry, and begin scan of posting tree or just store
     341                 :      * posting list in memory
     342                 :      */
     343            1846 :     ginPrepareEntryScan(&btreeEntry, entry->attnum,
     344            1846 :                         entry->queryKey, entry->queryCategory,
     345                 :                         ginstate);
     346            1846 :     stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
     347            1846 :     page = BufferGetPage(stackEntry->buffer);
     348                 : 
     349                 :     /* ginFindLeafPage() will have already checked snapshot age. */
     350            1846 :     needUnlock = true;
     351                 : 
     352            1846 :     entry->isFinished = true;
     353                 : 
     354            1846 :     if (entry->isPartialMatch ||
     355            1720 :         entry->queryCategory == GIN_CAT_EMPTY_QUERY)
     356                 :     {
     357                 :         /*
     358                 :          * btreeEntry.findItem locates the first item >= given search key.
     359                 :          * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
     360                 :          * because of the way the GIN_CAT_EMPTY_QUERY category code is
     361                 :          * assigned.)  We scan forward from there and collect all TIDs needed
     362                 :          * for the entry type.
     363                 :          */
     364             267 :         btreeEntry.findItem(&btreeEntry, stackEntry);
     365             267 :         if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
     366             267 :             == false)
     367                 :         {
     368                 :             /*
     369                 :              * GIN tree was seriously restructured, so we will cleanup all
     370                 :              * found data and rescan. See comments near 'return false' in
     371                 :              * collectMatchBitmap()
     372                 :              */
     373 UBC           0 :             if (entry->matchBitmap)
     374                 :             {
     375               0 :                 if (entry->matchIterator)
     376               0 :                     tbm_end_iterate(entry->matchIterator);
     377               0 :                 entry->matchIterator = NULL;
     378               0 :                 tbm_free(entry->matchBitmap);
     379               0 :                 entry->matchBitmap = NULL;
     380                 :             }
     381               0 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     382               0 :             freeGinBtreeStack(stackEntry);
     383               0 :             goto restartScanEntry;
     384                 :         }
     385                 : 
     386 CBC         267 :         if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
     387                 :         {
     388             210 :             entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
     389             210 :             entry->isFinished = false;
     390                 :         }
     391                 :     }
     392            1579 :     else if (btreeEntry.findItem(&btreeEntry, stackEntry))
     393                 :     {
     394            1089 :         IndexTuple  itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
     395                 : 
     396            1089 :         if (GinIsPostingTree(itup))
     397                 :         {
     398              32 :             BlockNumber rootPostingTree = GinGetPostingTree(itup);
     399                 :             GinBtreeStack *stack;
     400                 :             Page        entrypage;
     401                 :             ItemPointerData minItem;
     402                 : 
     403                 :             /*
     404                 :              * This is an equality scan, so lock the root of the posting tree.
     405                 :              * It represents a lock on the exact key value, and covers all the
     406                 :              * items in the posting tree.
     407                 :              */
     408              32 :             PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
     409                 : 
     410                 :             /*
     411                 :              * We should unlock entry page before touching posting tree to
     412                 :              * prevent deadlocks with vacuum processes. Because entry is never
     413                 :              * deleted from page and posting tree is never reduced to the
     414                 :              * posting list, we can unlock page after getting BlockNumber of
     415                 :              * root of posting tree.
     416                 :              */
     417              32 :             LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     418              32 :             needUnlock = false;
     419                 : 
     420              32 :             stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
     421                 :                                             rootPostingTree, snapshot);
     422              32 :             entry->buffer = stack->buffer;
     423                 : 
     424                 :             /*
     425                 :              * We keep buffer pinned because we need to prevent deletion of
     426                 :              * page during scan. See GIN's vacuum implementation. RefCount is
     427                 :              * increased to keep buffer pinned after freeGinBtreeStack() call.
     428                 :              */
     429              32 :             IncrBufferRefCount(entry->buffer);
     430                 : 
     431 GNC          32 :             entrypage = BufferGetPage(entry->buffer);
     432                 : 
     433                 :             /*
     434                 :              * Load the first page into memory.
     435                 :              */
     436 CBC          32 :             ItemPointerSetMin(&minItem);
     437 GNC          32 :             entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
     438                 : 
     439 CBC          32 :             entry->predictNumberResult = stack->predictNumber * entry->nlist;
     440                 : 
     441              32 :             LockBuffer(entry->buffer, GIN_UNLOCK);
     442              32 :             freeGinBtreeStack(stack);
     443              32 :             entry->isFinished = false;
     444                 :         }
     445                 :         else
     446                 :         {
     447                 :             /*
     448                 :              * Lock the entry leaf page.  This is more coarse-grained than
     449                 :              * necessary, because it will conflict with any insertions that
     450                 :              * land on the same leaf page, not only the exact key we searched
     451                 :              * for.  But locking an individual tuple would require updating
     452                 :              * that lock whenever it moves because of insertions or vacuums,
     453                 :              * which seems too complicated.
     454                 :              */
     455            1057 :             PredicateLockPage(ginstate->index,
     456                 :                               BufferGetBlockNumber(stackEntry->buffer),
     457                 :                               snapshot);
     458            1057 :             if (GinGetNPosting(itup) > 0)
     459                 :             {
     460            1054 :                 entry->list = ginReadTuple(ginstate, entry->attnum, itup,
     461                 :                                            &entry->nlist);
     462            1054 :                 entry->predictNumberResult = entry->nlist;
     463                 : 
     464            1054 :                 entry->isFinished = false;
     465                 :             }
     466                 :         }
     467                 :     }
     468                 :     else
     469                 :     {
     470                 :         /*
     471                 :          * No entry found.  Predicate lock the leaf page, to lock the place
     472                 :          * where the entry would've been, had there been one.
     473                 :          */
     474             490 :         PredicateLockPage(ginstate->index,
     475                 :                           BufferGetBlockNumber(stackEntry->buffer), snapshot);
     476                 :     }
     477                 : 
     478            1846 :     if (needUnlock)
     479            1814 :         LockBuffer(stackEntry->buffer, GIN_UNLOCK);
     480            1846 :     freeGinBtreeStack(stackEntry);
     481            1846 : }
     482                 : 
     483                 : /*
     484                 :  * Comparison function for scan entry indexes. Sorts by predictNumberResult,
     485                 :  * least frequent items first.
     486                 :  */
     487                 : static int
     488            1737 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
     489                 : {
     490            1737 :     const GinScanKey key = (const GinScanKey) arg;
     491            1737 :     int         i1 = *(const int *) a1;
     492            1737 :     int         i2 = *(const int *) a2;
     493            1737 :     uint32      n1 = key->scanEntry[i1]->predictNumberResult;
     494            1737 :     uint32      n2 = key->scanEntry[i2]->predictNumberResult;
     495                 : 
     496            1737 :     if (n1 < n2)
     497             317 :         return -1;
     498            1420 :     else if (n1 == n2)
     499             558 :         return 0;
     500                 :     else
     501             862 :         return 1;
     502                 : }
     503                 : 
     504                 : static void
     505             852 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
     506                 : {
     507             852 :     MemoryContext oldCtx = CurrentMemoryContext;
     508                 :     int         i;
     509                 :     int         j;
     510                 :     int        *entryIndexes;
     511                 : 
     512             852 :     ItemPointerSetMin(&key->curItem);
     513             852 :     key->curItemMatches = false;
     514             852 :     key->recheckCurItem = false;
     515             852 :     key->isFinished = false;
     516                 : 
     517                 :     /*
     518                 :      * Divide the entries into two distinct sets: required and additional.
     519                 :      * Additional entries are not enough for a match alone, without any items
     520                 :      * from the required set, but are needed by the consistent function to
     521                 :      * decide if an item matches. When scanning, we can skip over items from
     522                 :      * additional entries that have no corresponding matches in any of the
     523                 :      * required entries. That speeds up queries like "frequent & rare"
     524                 :      * considerably, if the frequent term can be put in the additional set.
     525                 :      *
     526                 :      * There can be many legal ways to divide them entries into these two
     527                 :      * sets. A conservative division is to just put everything in the required
     528                 :      * set, but the more you can put in the additional set, the more you can
     529                 :      * skip during the scan. To maximize skipping, we try to put as many
     530                 :      * frequent items as possible into additional, and less frequent ones into
     531                 :      * required. To do that, sort the entries by frequency
     532                 :      * (predictNumberResult), and put entries into the required set in that
     533                 :      * order, until the consistent function says that none of the remaining
     534                 :      * entries can form a match, without any items from the required set. The
     535                 :      * rest go to the additional set.
     536                 :      *
     537                 :      * Exclude-only scan keys are known to have no required entries.
     538                 :      */
     539             852 :     if (key->excludeOnly)
     540                 :     {
     541              19 :         MemoryContextSwitchTo(so->keyCtx);
     542                 : 
     543              19 :         key->nrequired = 0;
     544              19 :         key->nadditional = key->nentries;
     545              19 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     546              22 :         for (i = 0; i < key->nadditional; i++)
     547               3 :             key->additionalEntries[i] = key->scanEntry[i];
     548                 :     }
     549             833 :     else if (key->nentries > 1)
     550                 :     {
     551             275 :         MemoryContextSwitchTo(so->tempCtx);
     552                 : 
     553             275 :         entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
     554            1560 :         for (i = 0; i < key->nentries; i++)
     555            1285 :             entryIndexes[i] = i;
     556             275 :         qsort_arg(entryIndexes, key->nentries, sizeof(int),
     557                 :                   entryIndexByFrequencyCmp, key);
     558                 : 
     559             917 :         for (i = 0; i < key->nentries - 1; i++)
     560                 :         {
     561                 :             /* Pass all entries <= i as FALSE, and the rest as MAYBE */
     562           34779 :             for (j = 0; j <= i; j++)
     563           33928 :                 key->entryRes[entryIndexes[j]] = GIN_FALSE;
     564           35183 :             for (j = i + 1; j < key->nentries; j++)
     565           34332 :                 key->entryRes[entryIndexes[j]] = GIN_MAYBE;
     566                 : 
     567             851 :             if (key->triConsistentFn(key) == GIN_FALSE)
     568             209 :                 break;
     569                 :         }
     570                 :         /* i is now the last required entry. */
     571                 : 
     572             275 :         MemoryContextSwitchTo(so->keyCtx);
     573                 : 
     574             275 :         key->nrequired = i + 1;
     575             275 :         key->nadditional = key->nentries - key->nrequired;
     576             275 :         key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
     577             275 :         key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
     578                 : 
     579             275 :         j = 0;
     580            1192 :         for (i = 0; i < key->nrequired; i++)
     581             917 :             key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
     582             643 :         for (i = 0; i < key->nadditional; i++)
     583             368 :             key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
     584                 : 
     585                 :         /* clean up after consistentFn calls (also frees entryIndexes) */
     586             275 :         MemoryContextReset(so->tempCtx);
     587                 :     }
     588                 :     else
     589                 :     {
     590             558 :         MemoryContextSwitchTo(so->keyCtx);
     591                 : 
     592             558 :         key->nrequired = 1;
     593             558 :         key->nadditional = 0;
     594             558 :         key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
     595             558 :         key->requiredEntries[0] = key->scanEntry[0];
     596                 :     }
     597             852 :     MemoryContextSwitchTo(oldCtx);
     598             852 : }
     599                 : 
     600                 : static void
     601             788 : startScan(IndexScanDesc scan)
     602                 : {
     603             788 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
     604             788 :     GinState   *ginstate = &so->ginstate;
     605                 :     uint32      i;
     606                 : 
     607            2634 :     for (i = 0; i < so->totalentries; i++)
     608            1846 :         startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
     609                 : 
     610             788 :     if (GinFuzzySearchLimit > 0)
     611                 :     {
     612                 :         /*
     613                 :          * If all of keys more than threshold we will try to reduce result, we
     614                 :          * hope (and only hope, for intersection operation of array our
     615                 :          * supposition isn't true), that total result will not more than
     616                 :          * minimal predictNumberResult.
     617                 :          */
     618               3 :         bool        reduce = true;
     619                 : 
     620               6 :         for (i = 0; i < so->totalentries; i++)
     621                 :         {
     622               3 :             if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
     623                 :             {
     624 UBC           0 :                 reduce = false;
     625               0 :                 break;
     626                 :             }
     627                 :         }
     628 CBC           3 :         if (reduce)
     629                 :         {
     630               6 :             for (i = 0; i < so->totalentries; i++)
     631                 :             {
     632               3 :                 so->entries[i]->predictNumberResult /= so->totalentries;
     633               3 :                 so->entries[i]->reduceResult = true;
     634                 :             }
     635                 :         }
     636                 :     }
     637                 : 
     638                 :     /*
     639                 :      * Now that we have the estimates for the entry frequencies, finish
     640                 :      * initializing the scan keys.
     641                 :      */
     642            1640 :     for (i = 0; i < so->nkeys; i++)
     643             852 :         startScanKey(ginstate, so, so->keys + i);
     644             788 : }
     645                 : 
     646                 : /*
     647                 :  * Load the next batch of item pointers from a posting tree.
     648                 :  *
     649                 :  * Note that we copy the page into GinScanEntry->list array and unlock it, but
     650                 :  * keep it pinned to prevent interference with vacuum.
     651                 :  */
     652                 : static void
     653              50 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
     654                 :                    ItemPointerData advancePast, Snapshot snapshot)
     655                 : {
     656                 :     Page        page;
     657                 :     int         i;
     658                 :     bool        stepright;
     659                 : 
     660              50 :     if (!BufferIsValid(entry->buffer))
     661                 :     {
     662 UBC           0 :         entry->isFinished = true;
     663               0 :         return;
     664                 :     }
     665                 : 
     666                 :     /*
     667                 :      * We have two strategies for finding the correct page: step right from
     668                 :      * the current page, or descend the tree again from the root. If
     669                 :      * advancePast equals the current item, the next matching item should be
     670                 :      * on the next page, so we step right. Otherwise, descend from root.
     671                 :      */
     672 CBC          50 :     if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
     673                 :     {
     674              47 :         stepright = true;
     675              47 :         LockBuffer(entry->buffer, GIN_SHARE);
     676                 :     }
     677                 :     else
     678                 :     {
     679                 :         GinBtreeStack *stack;
     680                 : 
     681               3 :         ReleaseBuffer(entry->buffer);
     682                 : 
     683                 :         /*
     684                 :          * Set the search key, and find the correct leaf page.
     685                 :          */
     686               3 :         if (ItemPointerIsLossyPage(&advancePast))
     687                 :         {
     688 UBC           0 :             ItemPointerSet(&entry->btree.itemptr,
     689               0 :                            GinItemPointerGetBlockNumber(&advancePast) + 1,
     690                 :                            FirstOffsetNumber);
     691                 :         }
     692                 :         else
     693                 :         {
     694 CBC           3 :             ItemPointerSet(&entry->btree.itemptr,
     695                 :                            GinItemPointerGetBlockNumber(&advancePast),
     696               3 :                            OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
     697                 :         }
     698               3 :         entry->btree.fullScan = false;
     699               3 :         stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
     700                 : 
     701                 :         /* we don't need the stack, just the buffer. */
     702               3 :         entry->buffer = stack->buffer;
     703               3 :         IncrBufferRefCount(entry->buffer);
     704               3 :         freeGinBtreeStack(stack);
     705               3 :         stepright = false;
     706                 :     }
     707                 : 
     708              50 :     elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
     709                 :          GinItemPointerGetBlockNumber(&advancePast),
     710                 :          GinItemPointerGetOffsetNumber(&advancePast),
     711                 :          !stepright);
     712                 : 
     713              50 :     page = BufferGetPage(entry->buffer);
     714                 :     for (;;)
     715                 :     {
     716              53 :         entry->offset = InvalidOffsetNumber;
     717              53 :         if (entry->list)
     718                 :         {
     719              47 :             pfree(entry->list);
     720              47 :             entry->list = NULL;
     721              47 :             entry->nlist = 0;
     722                 :         }
     723                 : 
     724              53 :         if (stepright)
     725                 :         {
     726                 :             /*
     727                 :              * We've processed all the entries on this page. If it was the
     728                 :              * last page in the tree, we're done.
     729                 :              */
     730              50 :             if (GinPageRightMost(page))
     731                 :             {
     732               3 :                 UnlockReleaseBuffer(entry->buffer);
     733               3 :                 entry->buffer = InvalidBuffer;
     734               3 :                 entry->isFinished = true;
     735               3 :                 return;
     736                 :             }
     737                 : 
     738                 :             /*
     739                 :              * Step to next page, following the right link. then find the
     740                 :              * first ItemPointer greater than advancePast.
     741                 :              */
     742              47 :             entry->buffer = ginStepRight(entry->buffer,
     743                 :                                          ginstate->index,
     744                 :                                          GIN_SHARE);
     745              47 :             page = BufferGetPage(entry->buffer);
     746                 :         }
     747              50 :         stepright = true;
     748                 : 
     749              50 :         if (GinPageGetOpaque(page)->flags & GIN_DELETED)
     750 UBC           0 :             continue;           /* page was deleted by concurrent vacuum */
     751                 : 
     752                 :         /*
     753                 :          * The first item > advancePast might not be on this page, but
     754                 :          * somewhere to the right, if the page was split, or a non-match from
     755                 :          * another key in the query allowed us to skip some items from this
     756                 :          * entry. Keep following the right-links until we re-find the correct
     757                 :          * page.
     758                 :          */
     759 CBC          68 :         if (!GinPageRightMost(page) &&
     760              18 :             ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
     761                 :         {
     762                 :             /*
     763                 :              * the item we're looking is > the right bound of the page, so it
     764                 :              * can't be on this page.
     765                 :              */
     766 UBC           0 :             continue;
     767                 :         }
     768                 : 
     769 CBC          50 :         entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
     770                 : 
     771             962 :         for (i = 0; i < entry->nlist; i++)
     772                 :         {
     773             959 :             if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
     774                 :             {
     775              47 :                 entry->offset = i;
     776                 : 
     777              47 :                 if (GinPageRightMost(page))
     778                 :                 {
     779                 :                     /* after processing the copied items, we're done. */
     780              29 :                     UnlockReleaseBuffer(entry->buffer);
     781              29 :                     entry->buffer = InvalidBuffer;
     782                 :                 }
     783                 :                 else
     784              18 :                     LockBuffer(entry->buffer, GIN_UNLOCK);
     785              47 :                 return;
     786                 :             }
     787                 :         }
     788                 :     }
     789                 : }
     790                 : 
     791                 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
     792                 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
     793                 : 
     794                 : /*
     795                 :  * Sets entry->curItem to next heap item pointer > advancePast, for one entry
     796                 :  * of one scan key, or sets entry->isFinished to true if there are no more.
     797                 :  *
     798                 :  * Item pointers are returned in ascending order.
     799                 :  *
     800                 :  * Note: this can return a "lossy page" item pointer, indicating that the
     801                 :  * entry potentially matches all items on that heap page.  However, it is
     802                 :  * not allowed to return both a lossy page pointer and exact (regular)
     803                 :  * item pointers for the same page.  (Doing so would break the key-combination
     804                 :  * logic in keyGetItem and scanGetItem; see comment in scanGetItem.)  In the
     805                 :  * current implementation this is guaranteed by the behavior of tidbitmaps.
     806                 :  */
     807                 : static void
     808          541698 : entryGetItem(GinState *ginstate, GinScanEntry entry,
     809                 :              ItemPointerData advancePast, Snapshot snapshot)
     810                 : {
     811          541698 :     Assert(!entry->isFinished);
     812                 : 
     813          541698 :     Assert(!ItemPointerIsValid(&entry->curItem) ||
     814                 :            ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
     815                 : 
     816          541698 :     if (entry->matchBitmap)
     817                 :     {
     818                 :         /* A bitmap result */
     819          134020 :         BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
     820          134020 :         OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
     821                 : 
     822                 :         for (;;)
     823                 :         {
     824                 :             /*
     825                 :              * If we've exhausted all items on this block, move to next block
     826                 :              * in the bitmap.
     827                 :              */
     828          134020 :             while (entry->matchResult == NULL ||
     829          136223 :                    (entry->matchResult->ntuples >= 0 &&
     830          136223 :                     entry->offset >= entry->matchResult->ntuples) ||
     831          404053 :                    entry->matchResult->blockno < advancePastBlk ||
     832          133810 :                    (ItemPointerIsLossyPage(&advancePast) &&
     833 UBC           0 :                     entry->matchResult->blockno == advancePastBlk))
     834                 :             {
     835 CBC        2623 :                 entry->matchResult = tbm_iterate(entry->matchIterator);
     836                 : 
     837            2623 :                 if (entry->matchResult == NULL)
     838                 :                 {
     839             210 :                     ItemPointerSetInvalid(&entry->curItem);
     840             210 :                     tbm_end_iterate(entry->matchIterator);
     841             210 :                     entry->matchIterator = NULL;
     842             210 :                     entry->isFinished = true;
     843             210 :                     break;
     844                 :                 }
     845                 : 
     846                 :                 /*
     847                 :                  * Reset counter to the beginning of entry->matchResult. Note:
     848                 :                  * entry->offset is still greater than matchResult->ntuples if
     849                 :                  * matchResult is lossy.  So, on next call we will get next
     850                 :                  * result from TIDBitmap.
     851                 :                  */
     852            2413 :                 entry->offset = 0;
     853                 :             }
     854          134020 :             if (entry->isFinished)
     855             210 :                 break;
     856                 : 
     857                 :             /*
     858                 :              * We're now on the first page after advancePast which has any
     859                 :              * items on it. If it's a lossy result, return that.
     860                 :              */
     861          133810 :             if (entry->matchResult->ntuples < 0)
     862                 :             {
     863 UBC           0 :                 ItemPointerSetLossyPage(&entry->curItem,
     864                 :                                         entry->matchResult->blockno);
     865                 : 
     866                 :                 /*
     867                 :                  * We might as well fall out of the loop; we could not
     868                 :                  * estimate number of results on this page to support correct
     869                 :                  * reducing of result even if it's enabled.
     870                 :                  */
     871               0 :                 break;
     872                 :             }
     873                 : 
     874                 :             /*
     875                 :              * Not a lossy page. Skip over any offsets <= advancePast, and
     876                 :              * return that.
     877                 :              */
     878 CBC      133810 :             if (entry->matchResult->blockno == advancePastBlk)
     879                 :             {
     880                 :                 /*
     881                 :                  * First, do a quick check against the last offset on the
     882                 :                  * page. If that's > advancePast, so are all the other
     883                 :                  * offsets, so just go back to the top to get the next page.
     884                 :                  */
     885          131607 :                 if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
     886                 :                 {
     887 UBC           0 :                     entry->offset = entry->matchResult->ntuples;
     888               0 :                     continue;
     889                 :                 }
     890                 : 
     891                 :                 /* Otherwise scan to find the first item > advancePast */
     892 CBC      131607 :                 while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
     893 UBC           0 :                     entry->offset++;
     894                 :             }
     895                 : 
     896 CBC      133810 :             ItemPointerSet(&entry->curItem,
     897          133810 :                            entry->matchResult->blockno,
     898          133810 :                            entry->matchResult->offsets[entry->offset]);
     899          133810 :             entry->offset++;
     900                 : 
     901                 :             /* Done unless we need to reduce the result */
     902          133810 :             if (!entry->reduceResult || !dropItem(entry))
     903                 :                 break;
     904                 :         }
     905                 :     }
     906          407678 :     else if (!BufferIsValid(entry->buffer))
     907                 :     {
     908                 :         /*
     909                 :          * A posting list from an entry tuple, or the last page of a posting
     910                 :          * tree.
     911                 :          */
     912                 :         for (;;)
     913                 :         {
     914          194714 :             if (entry->offset >= entry->nlist)
     915                 :             {
     916             810 :                 ItemPointerSetInvalid(&entry->curItem);
     917             810 :                 entry->isFinished = true;
     918             810 :                 break;
     919                 :             }
     920                 : 
     921          193904 :             entry->curItem = entry->list[entry->offset++];
     922                 : 
     923                 :             /* If we're not past advancePast, keep scanning */
     924          193904 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     925           33317 :                 continue;
     926                 : 
     927                 :             /* Done unless we need to reduce the result */
     928          160587 :             if (!entry->reduceResult || !dropItem(entry))
     929                 :                 break;
     930                 :         }
     931                 :     }
     932                 :     else
     933                 :     {
     934                 :         /* A posting tree */
     935                 :         for (;;)
     936                 :         {
     937                 :             /* If we've processed the current batch, load more items */
     938          339559 :             while (entry->offset >= entry->nlist)
     939                 :             {
     940              50 :                 entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
     941                 : 
     942              50 :                 if (entry->isFinished)
     943                 :                 {
     944               3 :                     ItemPointerSetInvalid(&entry->curItem);
     945               3 :                     return;
     946                 :                 }
     947                 :             }
     948                 : 
     949          339509 :             entry->curItem = entry->list[entry->offset++];
     950                 : 
     951                 :             /* If we're not past advancePast, keep scanning */
     952          339509 :             if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
     953           23553 :                 continue;
     954                 : 
     955                 :             /* Done unless we need to reduce the result */
     956          315956 :             if (!entry->reduceResult || !dropItem(entry))
     957                 :                 break;
     958                 : 
     959                 :             /*
     960                 :              * Advance advancePast (so that entryLoadMoreItems will load the
     961                 :              * right data), and keep scanning
     962                 :              */
     963           59484 :             advancePast = entry->curItem;
     964                 :         }
     965                 :     }
     966                 : }
     967                 : 
     968                 : /*
     969                 :  * Identify the "current" item among the input entry streams for this scan key
     970                 :  * that is greater than advancePast, and test whether it passes the scan key
     971                 :  * qual condition.
     972                 :  *
     973                 :  * The current item is the smallest curItem among the inputs.  key->curItem
     974                 :  * is set to that value.  key->curItemMatches is set to indicate whether that
     975                 :  * TID passes the consistentFn test.  If so, key->recheckCurItem is set true
     976                 :  * iff recheck is needed for this item pointer (including the case where the
     977                 :  * item pointer is a lossy page pointer).
     978                 :  *
     979                 :  * If all entry streams are exhausted, sets key->isFinished to true.
     980                 :  *
     981                 :  * Item pointers must be returned in ascending order.
     982                 :  *
     983                 :  * Note: this can return a "lossy page" item pointer, indicating that the
     984                 :  * key potentially matches all items on that heap page.  However, it is
     985                 :  * not allowed to return both a lossy page pointer and exact (regular)
     986                 :  * item pointers for the same page.  (Doing so would break the key-combination
     987                 :  * logic in scanGetItem.)
     988                 :  */
     989                 : static void
     990          501146 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
     991                 :            ItemPointerData advancePast, Snapshot snapshot)
     992                 : {
     993                 :     ItemPointerData minItem;
     994                 :     ItemPointerData curPageLossy;
     995                 :     uint32      i;
     996                 :     bool        haveLossyEntry;
     997                 :     GinScanEntry entry;
     998                 :     GinTernaryValue res;
     999                 :     MemoryContext oldCtx;
    1000                 :     bool        allFinished;
    1001                 : 
    1002          501146 :     Assert(!key->isFinished);
    1003                 : 
    1004                 :     /*
    1005                 :      * We might have already tested this item; if so, no need to repeat work.
    1006                 :      * (Note: the ">" case can happen, if advancePast is exact but we
    1007                 :      * previously had to set curItem to a lossy-page pointer.)
    1008                 :      */
    1009          501146 :     if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
    1010             799 :         return;
    1011                 : 
    1012                 :     /*
    1013                 :      * Find the minimum item > advancePast among the active entry streams.
    1014                 :      *
    1015                 :      * Note: a lossy-page entry is encoded by a ItemPointer with max value for
    1016                 :      * offset (0xffff), so that it will sort after any exact entries for the
    1017                 :      * same page.  So we'll prefer to return exact pointers not lossy
    1018                 :      * pointers, which is good.
    1019                 :      */
    1020          501135 :     ItemPointerSetMax(&minItem);
    1021          501135 :     allFinished = true;
    1022         1357742 :     for (i = 0; i < key->nrequired; i++)
    1023                 :     {
    1024          856607 :         entry = key->requiredEntries[i];
    1025                 : 
    1026          856607 :         if (entry->isFinished)
    1027          262844 :             continue;
    1028                 : 
    1029                 :         /*
    1030                 :          * Advance this stream if necessary.
    1031                 :          *
    1032                 :          * In particular, since entry->curItem was initialized with
    1033                 :          * ItemPointerSetMin, this ensures we fetch the first item for each
    1034                 :          * entry on the first call.
    1035                 :          */
    1036          593763 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1037                 :         {
    1038          514820 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1039          514820 :             if (entry->isFinished)
    1040             934 :                 continue;
    1041                 :         }
    1042                 : 
    1043          592829 :         allFinished = false;
    1044          592829 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1045          548728 :             minItem = entry->curItem;
    1046                 :     }
    1047                 : 
    1048          501135 :     if (allFinished && !key->excludeOnly)
    1049                 :     {
    1050                 :         /* all entries are finished */
    1051             788 :         key->isFinished = true;
    1052             788 :         return;
    1053                 :     }
    1054                 : 
    1055          500347 :     if (!key->excludeOnly)
    1056                 :     {
    1057                 :         /*
    1058                 :          * For a normal scan key, we now know there are no matches < minItem.
    1059                 :          *
    1060                 :          * If minItem is lossy, it means that there were no exact items on the
    1061                 :          * page among requiredEntries, because lossy pointers sort after exact
    1062                 :          * items. However, there might be exact items for the same page among
    1063                 :          * additionalEntries, so we mustn't advance past them.
    1064                 :          */
    1065          498109 :         if (ItemPointerIsLossyPage(&minItem))
    1066                 :         {
    1067 UBC           0 :             if (GinItemPointerGetBlockNumber(&advancePast) <
    1068               0 :                 GinItemPointerGetBlockNumber(&minItem))
    1069                 :             {
    1070               0 :                 ItemPointerSet(&advancePast,
    1071                 :                                GinItemPointerGetBlockNumber(&minItem),
    1072                 :                                InvalidOffsetNumber);
    1073                 :             }
    1074                 :         }
    1075                 :         else
    1076                 :         {
    1077 CBC      498109 :             Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
    1078          498109 :             ItemPointerSet(&advancePast,
    1079                 :                            GinItemPointerGetBlockNumber(&minItem),
    1080          498109 :                            OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
    1081                 :         }
    1082                 :     }
    1083                 :     else
    1084                 :     {
    1085                 :         /*
    1086                 :          * excludeOnly scan keys don't have any entries that are necessarily
    1087                 :          * present in matching items.  So, we consider the item just after
    1088                 :          * advancePast.
    1089                 :          */
    1090            2238 :         Assert(key->nrequired == 0);
    1091            2238 :         ItemPointerSet(&minItem,
    1092                 :                        GinItemPointerGetBlockNumber(&advancePast),
    1093            2238 :                        OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
    1094                 :     }
    1095                 : 
    1096                 :     /*
    1097                 :      * We might not have loaded all the entry streams for this TID yet. We
    1098                 :      * could call the consistent function, passing MAYBE for those entries, to
    1099                 :      * see if it can decide if this TID matches based on the information we
    1100                 :      * have. But if the consistent-function is expensive, and cannot in fact
    1101                 :      * decide with partial information, that could be a big loss. So, load all
    1102                 :      * the additional entries, before calling the consistent function.
    1103                 :      */
    1104          533677 :     for (i = 0; i < key->nadditional; i++)
    1105                 :     {
    1106           33330 :         entry = key->additionalEntries[i];
    1107                 : 
    1108           33330 :         if (entry->isFinished)
    1109             206 :             continue;
    1110                 : 
    1111           33124 :         if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1112                 :         {
    1113           26878 :             entryGetItem(ginstate, entry, advancePast, snapshot);
    1114           26878 :             if (entry->isFinished)
    1115              89 :                 continue;
    1116                 :         }
    1117                 : 
    1118                 :         /*
    1119                 :          * Normally, none of the items in additionalEntries can have a curItem
    1120                 :          * larger than minItem. But if minItem is a lossy page, then there
    1121                 :          * might be exact items on the same page among additionalEntries.
    1122                 :          */
    1123           33035 :         if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
    1124                 :         {
    1125 UBC           0 :             Assert(ItemPointerIsLossyPage(&minItem));
    1126               0 :             minItem = entry->curItem;
    1127                 :         }
    1128                 :     }
    1129                 : 
    1130                 :     /*
    1131                 :      * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
    1132                 :      * and perform consistentFn test.
    1133                 :      *
    1134                 :      * Lossy-page entries pose a problem, since we don't know the correct
    1135                 :      * entryRes state to pass to the consistentFn, and we also don't know what
    1136                 :      * its combining logic will be (could be AND, OR, or even NOT). If the
    1137                 :      * logic is OR then the consistentFn might succeed for all items in the
    1138                 :      * lossy page even when none of the other entries match.
    1139                 :      *
    1140                 :      * Our strategy is to call the tri-state consistent function, with the
    1141                 :      * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
    1142                 :      * returns FALSE, none of the lossy items alone are enough for a match, so
    1143                 :      * we don't need to return a lossy-page pointer. Otherwise, return a
    1144                 :      * lossy-page pointer to indicate that the whole heap page must be
    1145                 :      * checked.  (On subsequent calls, we'll do nothing until minItem is past
    1146                 :      * the page altogether, thus ensuring that we never return both regular
    1147                 :      * and lossy pointers for the same page.)
    1148                 :      *
    1149                 :      * An exception is that it doesn't matter what we pass for lossy pointers
    1150                 :      * in "hidden" entries, because the consistentFn's result can't depend on
    1151                 :      * them. We could pass them as MAYBE as well, but if we're using the
    1152                 :      * "shim" implementation of a tri-state consistent function (see
    1153                 :      * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
    1154                 :      * them as true.
    1155                 :      *
    1156                 :      * Note that only lossy-page entries pointing to the current item's page
    1157                 :      * should trigger this processing; we might have future lossy pages in the
    1158                 :      * entry array, but they aren't relevant yet.
    1159                 :      */
    1160 CBC      500347 :     key->curItem = minItem;
    1161          500347 :     ItemPointerSetLossyPage(&curPageLossy,
    1162                 :                             GinItemPointerGetBlockNumber(&key->curItem));
    1163          500347 :     haveLossyEntry = false;
    1164         1388854 :     for (i = 0; i < key->nentries; i++)
    1165                 :     {
    1166          888507 :         entry = key->scanEntry[i];
    1167         1514371 :         if (entry->isFinished == false &&
    1168          625864 :             ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1169                 :         {
    1170 UBC           0 :             if (i < key->nuserentries)
    1171               0 :                 key->entryRes[i] = GIN_MAYBE;
    1172                 :             else
    1173               0 :                 key->entryRes[i] = GIN_TRUE;
    1174               0 :             haveLossyEntry = true;
    1175                 :         }
    1176                 :         else
    1177 CBC      888507 :             key->entryRes[i] = GIN_FALSE;
    1178                 :     }
    1179                 : 
    1180                 :     /* prepare for calling consistentFn in temp context */
    1181          500347 :     oldCtx = MemoryContextSwitchTo(tempCtx);
    1182                 : 
    1183          500347 :     if (haveLossyEntry)
    1184                 :     {
    1185                 :         /* Have lossy-page entries, so see if whole page matches */
    1186 UBC           0 :         res = key->triConsistentFn(key);
    1187                 : 
    1188               0 :         if (res == GIN_TRUE || res == GIN_MAYBE)
    1189                 :         {
    1190                 :             /* Yes, so clean up ... */
    1191               0 :             MemoryContextSwitchTo(oldCtx);
    1192               0 :             MemoryContextReset(tempCtx);
    1193                 : 
    1194                 :             /* and return lossy pointer for whole page */
    1195               0 :             key->curItem = curPageLossy;
    1196               0 :             key->curItemMatches = true;
    1197               0 :             key->recheckCurItem = true;
    1198               0 :             return;
    1199                 :         }
    1200                 :     }
    1201                 : 
    1202                 :     /*
    1203                 :      * At this point we know that we don't need to return a lossy whole-page
    1204                 :      * pointer, but we might have matches for individual exact item pointers,
    1205                 :      * possibly in combination with a lossy pointer. Pass lossy pointers as
    1206                 :      * MAYBE to the ternary consistent function, to let it decide if this
    1207                 :      * tuple satisfies the overall key, even though we don't know if the lossy
    1208                 :      * entries match.
    1209                 :      *
    1210                 :      * Prepare entryRes array to be passed to consistentFn.
    1211                 :      */
    1212 CBC     1388854 :     for (i = 0; i < key->nentries; i++)
    1213                 :     {
    1214          888507 :         entry = key->scanEntry[i];
    1215          888507 :         if (entry->isFinished)
    1216          262643 :             key->entryRes[i] = GIN_FALSE;
    1217                 : #if 0
    1218                 : 
    1219                 :         /*
    1220                 :          * This case can't currently happen, because we loaded all the entries
    1221                 :          * for this item earlier.
    1222                 :          */
    1223                 :         else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
    1224                 :             key->entryRes[i] = GIN_MAYBE;
    1225                 : #endif
    1226          625864 :         else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
    1227 UBC           0 :             key->entryRes[i] = GIN_MAYBE;
    1228 CBC      625864 :         else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
    1229          535537 :             key->entryRes[i] = GIN_TRUE;
    1230                 :         else
    1231           90327 :             key->entryRes[i] = GIN_FALSE;
    1232                 :     }
    1233                 : 
    1234          500347 :     res = key->triConsistentFn(key);
    1235                 : 
    1236          500347 :     switch (res)
    1237                 :     {
    1238          432523 :         case GIN_TRUE:
    1239          432523 :             key->curItemMatches = true;
    1240                 :             /* triConsistentFn set recheckCurItem */
    1241          432523 :             break;
    1242                 : 
    1243            9013 :         case GIN_FALSE:
    1244            9013 :             key->curItemMatches = false;
    1245            9013 :             break;
    1246                 : 
    1247           58811 :         case GIN_MAYBE:
    1248           58811 :             key->curItemMatches = true;
    1249           58811 :             key->recheckCurItem = true;
    1250           58811 :             break;
    1251                 : 
    1252 UBC           0 :         default:
    1253                 : 
    1254                 :             /*
    1255                 :              * the 'default' case shouldn't happen, but if the consistent
    1256                 :              * function returns something bogus, this is the safe result
    1257                 :              */
    1258               0 :             key->curItemMatches = true;
    1259               0 :             key->recheckCurItem = true;
    1260               0 :             break;
    1261                 :     }
    1262                 : 
    1263                 :     /*
    1264                 :      * We have a tuple, and we know if it matches or not. If it's a non-match,
    1265                 :      * we could continue to find the next matching tuple, but let's break out
    1266                 :      * and give scanGetItem a chance to advance the other keys. They might be
    1267                 :      * able to skip past to a much higher TID, allowing us to save work.
    1268                 :      */
    1269                 : 
    1270                 :     /* clean up after consistentFn calls */
    1271 CBC      500347 :     MemoryContextSwitchTo(oldCtx);
    1272          500347 :     MemoryContextReset(tempCtx);
    1273                 : }
    1274                 : 
    1275                 : /*
    1276                 :  * Get next heap item pointer (after advancePast) from scan.
    1277                 :  * Returns true if anything found.
    1278                 :  * On success, *item and *recheck are set.
    1279                 :  *
    1280                 :  * Note: this is very nearly the same logic as in keyGetItem(), except
    1281                 :  * that we know the keys are to be combined with AND logic, whereas in
    1282                 :  * keyGetItem() the combination logic is known only to the consistentFn.
    1283                 :  */
    1284                 : static bool
    1285          489836 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
    1286                 :             ItemPointerData *item, bool *recheck)
    1287                 : {
    1288          489836 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1289                 :     uint32      i;
    1290                 :     bool        match;
    1291                 : 
    1292                 :     /*----------
    1293                 :      * Advance the scan keys in lock-step, until we find an item that matches
    1294                 :      * all the keys. If any key reports isFinished, meaning its subset of the
    1295                 :      * entries is exhausted, we can stop.  Otherwise, set *item to the next
    1296                 :      * matching item.
    1297                 :      *
    1298                 :      * This logic works only if a keyGetItem stream can never contain both
    1299                 :      * exact and lossy pointers for the same page.  Else we could have a
    1300                 :      * case like
    1301                 :      *
    1302                 :      *      stream 1        stream 2
    1303                 :      *      ...             ...
    1304                 :      *      42/6            42/7
    1305                 :      *      50/1            42/0xffff
    1306                 :      *      ...             ...
    1307                 :      *
    1308                 :      * We would conclude that 42/6 is not a match and advance stream 1,
    1309                 :      * thus never detecting the match to the lossy pointer in stream 2.
    1310                 :      * (keyGetItem has a similar problem versus entryGetItem.)
    1311                 :      *----------
    1312                 :      */
    1313                 :     do
    1314                 :     {
    1315          498878 :         ItemPointerSetMin(item);
    1316          498878 :         match = true;
    1317          990223 :         for (i = 0; i < so->nkeys && match; i++)
    1318                 :         {
    1319          501146 :             GinScanKey  key = so->keys + i;
    1320                 : 
    1321                 :             /*
    1322                 :              * If we're considering a lossy page, skip excludeOnly keys. They
    1323                 :              * can't exclude the whole page anyway.
    1324                 :              */
    1325          501146 :             if (ItemPointerIsLossyPage(item) && key->excludeOnly)
    1326                 :             {
    1327                 :                 /*
    1328                 :                  * ginNewScanKey() should never mark the first key as
    1329                 :                  * excludeOnly.
    1330                 :                  */
    1331 UBC           0 :                 Assert(i > 0);
    1332               0 :                 continue;
    1333                 :             }
    1334                 : 
    1335                 :             /* Fetch the next item for this key that is > advancePast. */
    1336 CBC      501146 :             keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
    1337                 :                        scan->xs_snapshot);
    1338                 : 
    1339          501146 :             if (key->isFinished)
    1340             788 :                 return false;
    1341                 : 
    1342                 :             /*
    1343                 :              * If it's not a match, we can immediately conclude that nothing
    1344                 :              * <= this item matches, without checking the rest of the keys.
    1345                 :              */
    1346          500358 :             if (!key->curItemMatches)
    1347                 :             {
    1348            9013 :                 advancePast = key->curItem;
    1349            9013 :                 match = false;
    1350            9013 :                 break;
    1351                 :             }
    1352                 : 
    1353                 :             /*
    1354                 :              * It's a match. We can conclude that nothing < matches, so the
    1355                 :              * other key streams can skip to this item.
    1356                 :              *
    1357                 :              * Beware of lossy pointers, though; from a lossy pointer, we can
    1358                 :              * only conclude that nothing smaller than this *block* matches.
    1359                 :              */
    1360          491345 :             if (ItemPointerIsLossyPage(&key->curItem))
    1361                 :             {
    1362 UBC           0 :                 if (GinItemPointerGetBlockNumber(&advancePast) <
    1363               0 :                     GinItemPointerGetBlockNumber(&key->curItem))
    1364                 :                 {
    1365               0 :                     ItemPointerSet(&advancePast,
    1366               0 :                                    GinItemPointerGetBlockNumber(&key->curItem),
    1367                 :                                    InvalidOffsetNumber);
    1368                 :                 }
    1369                 :             }
    1370                 :             else
    1371                 :             {
    1372 CBC      491345 :                 Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
    1373          491345 :                 ItemPointerSet(&advancePast,
    1374          491345 :                                GinItemPointerGetBlockNumber(&key->curItem),
    1375          491345 :                                OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
    1376                 :             }
    1377                 : 
    1378                 :             /*
    1379                 :              * If this is the first key, remember this location as a potential
    1380                 :              * match, and proceed to check the rest of the keys.
    1381                 :              *
    1382                 :              * Otherwise, check if this is the same item that we checked the
    1383                 :              * previous keys for (or a lossy pointer for the same page). If
    1384                 :              * not, loop back to check the previous keys for this item (we
    1385                 :              * will check this key again too, but keyGetItem returns quickly
    1386                 :              * for that)
    1387                 :              */
    1388          491345 :             if (i == 0)
    1389                 :             {
    1390          489130 :                 *item = key->curItem;
    1391                 :             }
    1392                 :             else
    1393                 :             {
    1394            4430 :                 if (ItemPointerIsLossyPage(&key->curItem) ||
    1395            2215 :                     ItemPointerIsLossyPage(item))
    1396                 :                 {
    1397 UBC           0 :                     Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
    1398               0 :                     match = (GinItemPointerGetBlockNumber(&key->curItem) ==
    1399               0 :                              GinItemPointerGetBlockNumber(item));
    1400                 :                 }
    1401                 :                 else
    1402                 :                 {
    1403 CBC        2215 :                     Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
    1404            2215 :                     match = (ginCompareItemPointers(&key->curItem, item) == 0);
    1405                 :                 }
    1406                 :             }
    1407                 :         }
    1408          498090 :     } while (!match);
    1409                 : 
    1410          489048 :     Assert(!ItemPointerIsMin(item));
    1411                 : 
    1412                 :     /*
    1413                 :      * Now *item contains the first ItemPointer after previous result that
    1414                 :      * satisfied all the keys for that exact TID, or a lossy reference to the
    1415                 :      * same page.
    1416                 :      *
    1417                 :      * We must return recheck = true if any of the keys are marked recheck.
    1418                 :      */
    1419          489048 :     *recheck = false;
    1420          912948 :     for (i = 0; i < so->nkeys; i++)
    1421                 :     {
    1422          489234 :         GinScanKey  key = so->keys + i;
    1423                 : 
    1424          489234 :         if (key->recheckCurItem)
    1425                 :         {
    1426           65334 :             *recheck = true;
    1427           65334 :             break;
    1428                 :         }
    1429                 :     }
    1430                 : 
    1431          489048 :     return true;
    1432                 : }
    1433                 : 
    1434                 : 
    1435                 : /*
    1436                 :  * Functions for scanning the pending list
    1437                 :  */
    1438                 : 
    1439                 : 
    1440                 : /*
    1441                 :  * Get ItemPointer of next heap row to be checked from pending list.
    1442                 :  * Returns false if there are no more. On pages with several heap rows
    1443                 :  * it returns each row separately, on page with part of heap row returns
    1444                 :  * per page data.  pos->firstOffset and pos->lastOffset are set to identify
    1445                 :  * the range of pending-list tuples belonging to this heap row.
    1446                 :  *
    1447                 :  * The pendingBuffer is presumed pinned and share-locked on entry, and is
    1448                 :  * pinned and share-locked on success exit.  On failure exit it's released.
    1449                 :  */
    1450                 : static bool
    1451             807 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
    1452                 : {
    1453                 :     OffsetNumber maxoff;
    1454                 :     Page        page;
    1455                 :     IndexTuple  itup;
    1456                 : 
    1457             807 :     ItemPointerSetInvalid(&pos->item);
    1458                 :     for (;;)
    1459                 :     {
    1460             807 :         page = BufferGetPage(pos->pendingBuffer);
    1461             807 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1462                 : 
    1463             807 :         maxoff = PageGetMaxOffsetNumber(page);
    1464             807 :         if (pos->firstOffset > maxoff)
    1465                 :         {
    1466              83 :             BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
    1467                 : 
    1468              83 :             if (blkno == InvalidBlockNumber)
    1469                 :             {
    1470              83 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1471              83 :                 pos->pendingBuffer = InvalidBuffer;
    1472                 : 
    1473              83 :                 return false;
    1474                 :             }
    1475                 :             else
    1476                 :             {
    1477                 :                 /*
    1478                 :                  * Here we must prevent deletion of next page by insertcleanup
    1479                 :                  * process, which may be trying to obtain exclusive lock on
    1480                 :                  * current page.  So, we lock next page before releasing the
    1481                 :                  * current one
    1482                 :                  */
    1483 UBC           0 :                 Buffer      tmpbuf = ReadBuffer(scan->indexRelation, blkno);
    1484                 : 
    1485               0 :                 LockBuffer(tmpbuf, GIN_SHARE);
    1486               0 :                 UnlockReleaseBuffer(pos->pendingBuffer);
    1487                 : 
    1488               0 :                 pos->pendingBuffer = tmpbuf;
    1489               0 :                 pos->firstOffset = FirstOffsetNumber;
    1490                 :             }
    1491                 :         }
    1492                 :         else
    1493                 :         {
    1494 CBC         724 :             itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
    1495             724 :             pos->item = itup->t_tid;
    1496             724 :             if (GinPageHasFullRow(page))
    1497                 :             {
    1498                 :                 /*
    1499                 :                  * find itempointer to the next row
    1500                 :                  */
    1501            1677 :                 for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
    1502                 :                 {
    1503            1594 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
    1504            1594 :                     if (!ItemPointerEquals(&pos->item, &itup->t_tid))
    1505             641 :                         break;
    1506                 :                 }
    1507                 :             }
    1508                 :             else
    1509                 :             {
    1510                 :                 /*
    1511                 :                  * All itempointers are the same on this page
    1512                 :                  */
    1513 UBC           0 :                 pos->lastOffset = maxoff + 1;
    1514                 :             }
    1515                 : 
    1516                 :             /*
    1517                 :              * Now pos->firstOffset points to the first tuple of current heap
    1518                 :              * row, pos->lastOffset points to the first tuple of next heap row
    1519                 :              * (or to the end of page)
    1520                 :              */
    1521 CBC         724 :             break;
    1522                 :         }
    1523                 :     }
    1524                 : 
    1525             724 :     return true;
    1526                 : }
    1527                 : 
    1528                 : /*
    1529                 :  * Scan pending-list page from current tuple (off) up till the first of:
    1530                 :  * - match is found (then returns true)
    1531                 :  * - no later match is possible
    1532                 :  * - tuple's attribute number is not equal to entry's attrnum
    1533                 :  * - reach end of page
    1534                 :  *
    1535                 :  * datum[]/category[]/datumExtracted[] arrays are used to cache the results
    1536                 :  * of gintuple_get_key() on the current page.
    1537                 :  */
    1538                 : static bool
    1539 UBC           0 : matchPartialInPendingList(GinState *ginstate, Page page,
    1540                 :                           OffsetNumber off, OffsetNumber maxoff,
    1541                 :                           GinScanEntry entry,
    1542                 :                           Datum *datum, GinNullCategory *category,
    1543                 :                           bool *datumExtracted)
    1544                 : {
    1545                 :     IndexTuple  itup;
    1546                 :     int32       cmp;
    1547                 : 
    1548                 :     /* Partial match to a null is not possible */
    1549               0 :     if (entry->queryCategory != GIN_CAT_NORM_KEY)
    1550               0 :         return false;
    1551                 : 
    1552               0 :     while (off < maxoff)
    1553                 :     {
    1554               0 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
    1555                 : 
    1556               0 :         if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
    1557               0 :             return false;
    1558                 : 
    1559               0 :         if (datumExtracted[off - 1] == false)
    1560                 :         {
    1561               0 :             datum[off - 1] = gintuple_get_key(ginstate, itup,
    1562               0 :                                               &category[off - 1]);
    1563               0 :             datumExtracted[off - 1] = true;
    1564                 :         }
    1565                 : 
    1566                 :         /* Once we hit nulls, no further match is possible */
    1567               0 :         if (category[off - 1] != GIN_CAT_NORM_KEY)
    1568               0 :             return false;
    1569                 : 
    1570                 :         /*----------
    1571                 :          * Check partial match.
    1572                 :          * case cmp == 0 => match
    1573                 :          * case cmp > 0 => not match and end scan (no later match possible)
    1574                 :          * case cmp < 0 => not match and continue scan
    1575                 :          *----------
    1576                 :          */
    1577               0 :         cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
    1578               0 :                                               ginstate->supportCollation[entry->attnum - 1],
    1579                 :                                               entry->queryKey,
    1580               0 :                                               datum[off - 1],
    1581               0 :                                               UInt16GetDatum(entry->strategy),
    1582               0 :                                               PointerGetDatum(entry->extra_data)));
    1583               0 :         if (cmp == 0)
    1584               0 :             return true;
    1585               0 :         else if (cmp > 0)
    1586               0 :             return false;
    1587                 : 
    1588               0 :         off++;
    1589                 :     }
    1590                 : 
    1591               0 :     return false;
    1592                 : }
    1593                 : 
    1594                 : /*
    1595                 :  * Set up the entryRes array for each key by looking at
    1596                 :  * every entry for current heap row in pending list.
    1597                 :  *
    1598                 :  * Returns true if each scan key has at least one entryRes match.
    1599                 :  * This corresponds to the situations where the normal index search will
    1600                 :  * try to apply the key's consistentFn.  (A tuple not meeting that requirement
    1601                 :  * cannot be returned by the normal search since no entry stream will
    1602                 :  * source its TID.)
    1603                 :  *
    1604                 :  * The pendingBuffer is presumed pinned and share-locked on entry.
    1605                 :  */
    1606                 : static bool
    1607 CBC         724 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
    1608                 : {
    1609             724 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1610                 :     OffsetNumber attrnum;
    1611                 :     Page        page;
    1612                 :     IndexTuple  itup;
    1613                 :     int         i,
    1614                 :                 j;
    1615                 : 
    1616                 :     /*
    1617                 :      * Reset all entryRes and hasMatchKey flags
    1618                 :      */
    1619            1962 :     for (i = 0; i < so->nkeys; i++)
    1620                 :     {
    1621            1238 :         GinScanKey  key = so->keys + i;
    1622                 : 
    1623            1238 :         memset(key->entryRes, GIN_FALSE, key->nentries);
    1624                 :     }
    1625             724 :     memset(pos->hasMatchKey, false, so->nkeys);
    1626                 : 
    1627                 :     /*
    1628                 :      * Outer loop iterates over multiple pending-list pages when a single heap
    1629                 :      * row has entries spanning those pages.
    1630                 :      */
    1631                 :     for (;;)
    1632 UBC           0 :     {
    1633                 :         Datum       datum[BLCKSZ / sizeof(IndexTupleData)];
    1634                 :         GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
    1635                 :         bool        datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
    1636                 : 
    1637 CBC         724 :         Assert(pos->lastOffset > pos->firstOffset);
    1638             724 :         memset(datumExtracted + pos->firstOffset - 1, 0,
    1639             724 :                sizeof(bool) * (pos->lastOffset - pos->firstOffset));
    1640                 : 
    1641             724 :         page = BufferGetPage(pos->pendingBuffer);
    1642             724 :         TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1643                 : 
    1644            1962 :         for (i = 0; i < so->nkeys; i++)
    1645                 :         {
    1646            1238 :             GinScanKey  key = so->keys + i;
    1647                 : 
    1648            2388 :             for (j = 0; j < key->nentries; j++)
    1649                 :             {
    1650            1150 :                 GinScanEntry entry = key->scanEntry[j];
    1651            1150 :                 OffsetNumber StopLow = pos->firstOffset,
    1652            1150 :                             StopHigh = pos->lastOffset,
    1653                 :                             StopMiddle;
    1654                 : 
    1655                 :                 /* If already matched on earlier page, do no extra work */
    1656            1150 :                 if (key->entryRes[j])
    1657 UBC           0 :                     continue;
    1658                 : 
    1659                 :                 /*
    1660                 :                  * Interesting tuples are from pos->firstOffset to
    1661                 :                  * pos->lastOffset and they are ordered by (attnum, Datum) as
    1662                 :                  * it's done in entry tree.  So we can use binary search to
    1663                 :                  * avoid linear scanning.
    1664                 :                  */
    1665 CBC        2554 :                 while (StopLow < StopHigh)
    1666                 :                 {
    1667                 :                     int         res;
    1668                 : 
    1669            2005 :                     StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
    1670                 : 
    1671            2005 :                     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
    1672                 : 
    1673            2005 :                     attrnum = gintuple_get_attrnum(&so->ginstate, itup);
    1674                 : 
    1675            2005 :                     if (key->attnum < attrnum)
    1676                 :                     {
    1677             399 :                         StopHigh = StopMiddle;
    1678             399 :                         continue;
    1679                 :                     }
    1680            1606 :                     if (key->attnum > attrnum)
    1681                 :                     {
    1682             330 :                         StopLow = StopMiddle + 1;
    1683             330 :                         continue;
    1684                 :                     }
    1685                 : 
    1686            1276 :                     if (datumExtracted[StopMiddle - 1] == false)
    1687                 :                     {
    1688            2476 :                         datum[StopMiddle - 1] =
    1689            1238 :                             gintuple_get_key(&so->ginstate, itup,
    1690            1238 :                                              &category[StopMiddle - 1]);
    1691            1238 :                         datumExtracted[StopMiddle - 1] = true;
    1692                 :                     }
    1693                 : 
    1694            1276 :                     if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
    1695                 :                     {
    1696                 :                         /* special behavior depending on searchMode */
    1697             542 :                         if (entry->searchMode == GIN_SEARCH_MODE_ALL)
    1698                 :                         {
    1699                 :                             /* match anything except NULL_ITEM */
    1700             542 :                             if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
    1701             189 :                                 res = -1;
    1702                 :                             else
    1703             353 :                                 res = 0;
    1704                 :                         }
    1705                 :                         else
    1706                 :                         {
    1707                 :                             /* match everything */
    1708 UBC           0 :                             res = 0;
    1709                 :                         }
    1710                 :                     }
    1711                 :                     else
    1712                 :                     {
    1713 CBC         734 :                         res = ginCompareEntries(&so->ginstate,
    1714             734 :                                                 entry->attnum,
    1715                 :                                                 entry->queryKey,
    1716             734 :                                                 entry->queryCategory,
    1717             734 :                                                 datum[StopMiddle - 1],
    1718             734 :                                                 category[StopMiddle - 1]);
    1719                 :                     }
    1720                 : 
    1721            1276 :                     if (res == 0)
    1722                 :                     {
    1723                 :                         /*
    1724                 :                          * Found exact match (there can be only one, except in
    1725                 :                          * EMPTY_QUERY mode).
    1726                 :                          *
    1727                 :                          * If doing partial match, scan forward from here to
    1728                 :                          * end of page to check for matches.
    1729                 :                          *
    1730                 :                          * See comment above about tuple's ordering.
    1731                 :                          */
    1732             601 :                         if (entry->isPartialMatch)
    1733 UBC           0 :                             key->entryRes[j] =
    1734               0 :                                 matchPartialInPendingList(&so->ginstate,
    1735                 :                                                           page,
    1736                 :                                                           StopMiddle,
    1737               0 :                                                           pos->lastOffset,
    1738                 :                                                           entry,
    1739                 :                                                           datum,
    1740                 :                                                           category,
    1741                 :                                                           datumExtracted);
    1742                 :                         else
    1743 CBC         601 :                             key->entryRes[j] = true;
    1744                 : 
    1745                 :                         /* done with binary search */
    1746             601 :                         break;
    1747                 :                     }
    1748             675 :                     else if (res < 0)
    1749             657 :                         StopHigh = StopMiddle;
    1750                 :                     else
    1751              18 :                         StopLow = StopMiddle + 1;
    1752                 :                 }
    1753                 : 
    1754            1150 :                 if (StopLow >= StopHigh && entry->isPartialMatch)
    1755                 :                 {
    1756                 :                     /*
    1757                 :                      * No exact match on this page.  If doing partial match,
    1758                 :                      * scan from the first tuple greater than target value to
    1759                 :                      * end of page.  Note that since we don't remember whether
    1760                 :                      * the comparePartialFn told us to stop early on a
    1761                 :                      * previous page, we will uselessly apply comparePartialFn
    1762                 :                      * to the first tuple on each subsequent page.
    1763                 :                      */
    1764 UBC           0 :                     key->entryRes[j] =
    1765               0 :                         matchPartialInPendingList(&so->ginstate,
    1766                 :                                                   page,
    1767                 :                                                   StopHigh,
    1768               0 :                                                   pos->lastOffset,
    1769                 :                                                   entry,
    1770                 :                                                   datum,
    1771                 :                                                   category,
    1772                 :                                                   datumExtracted);
    1773                 :                 }
    1774                 : 
    1775 CBC        1150 :                 pos->hasMatchKey[i] |= key->entryRes[j];
    1776                 :             }
    1777                 :         }
    1778                 : 
    1779                 :         /* Advance firstOffset over the scanned tuples */
    1780             724 :         pos->firstOffset = pos->lastOffset;
    1781                 : 
    1782             724 :         if (GinPageHasFullRow(page))
    1783                 :         {
    1784                 :             /*
    1785                 :              * We have examined all pending entries for the current heap row.
    1786                 :              * Break out of loop over pages.
    1787                 :              */
    1788             724 :             break;
    1789                 :         }
    1790                 :         else
    1791                 :         {
    1792                 :             /*
    1793                 :              * Advance to next page of pending entries for the current heap
    1794                 :              * row.  Complain if there isn't one.
    1795                 :              */
    1796 UBC           0 :             ItemPointerData item = pos->item;
    1797                 : 
    1798               0 :             if (scanGetCandidate(scan, pos) == false ||
    1799               0 :                 !ItemPointerEquals(&pos->item, &item))
    1800               0 :                 elog(ERROR, "could not find additional pending pages for same heap tuple");
    1801                 :         }
    1802                 :     }
    1803                 : 
    1804                 :     /*
    1805                 :      * All scan keys except excludeOnly require at least one entry to match.
    1806                 :      * excludeOnly keys are an exception, because their implied
    1807                 :      * GIN_CAT_EMPTY_QUERY scanEntry always matches.  So return "true" if all
    1808                 :      * non-excludeOnly scan keys have at least one match.
    1809                 :      */
    1810 CBC        1269 :     for (i = 0; i < so->nkeys; i++)
    1811                 :     {
    1812             992 :         if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
    1813             447 :             return false;
    1814                 :     }
    1815                 : 
    1816             277 :     return true;
    1817                 : }
    1818                 : 
    1819                 : /*
    1820                 :  * Collect all matched rows from pending list into bitmap.
    1821                 :  */
    1822                 : static void
    1823             788 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
    1824                 : {
    1825             788 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1826                 :     MemoryContext oldCtx;
    1827                 :     bool        recheck,
    1828                 :                 match;
    1829                 :     int         i;
    1830                 :     pendingPosition pos;
    1831             788 :     Buffer      metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
    1832                 :     Page        page;
    1833                 :     BlockNumber blkno;
    1834                 : 
    1835             788 :     *ntids = 0;
    1836                 : 
    1837                 :     /*
    1838                 :      * Acquire predicate lock on the metapage, to conflict with any fastupdate
    1839                 :      * insertions.
    1840                 :      */
    1841             788 :     PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
    1842                 : 
    1843             788 :     LockBuffer(metabuffer, GIN_SHARE);
    1844             788 :     page = BufferGetPage(metabuffer);
    1845             788 :     TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
    1846             788 :     blkno = GinPageGetMeta(page)->head;
    1847                 : 
    1848                 :     /*
    1849                 :      * fetch head of list before unlocking metapage. head page must be pinned
    1850                 :      * to prevent deletion by vacuum process
    1851                 :      */
    1852             788 :     if (blkno == InvalidBlockNumber)
    1853                 :     {
    1854                 :         /* No pending list, so proceed with normal scan */
    1855             705 :         UnlockReleaseBuffer(metabuffer);
    1856             705 :         return;
    1857                 :     }
    1858                 : 
    1859              83 :     pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
    1860              83 :     LockBuffer(pos.pendingBuffer, GIN_SHARE);
    1861              83 :     pos.firstOffset = FirstOffsetNumber;
    1862              83 :     UnlockReleaseBuffer(metabuffer);
    1863              83 :     pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
    1864                 : 
    1865                 :     /*
    1866                 :      * loop for each heap row. scanGetCandidate returns full row or row's
    1867                 :      * tuples from first page.
    1868                 :      */
    1869             807 :     while (scanGetCandidate(scan, &pos))
    1870                 :     {
    1871                 :         /*
    1872                 :          * Check entries in tuple and set up entryRes array.
    1873                 :          *
    1874                 :          * If pending tuples belonging to the current heap row are spread
    1875                 :          * across several pages, collectMatchesForHeapRow will read all of
    1876                 :          * those pages.
    1877                 :          */
    1878             724 :         if (!collectMatchesForHeapRow(scan, &pos))
    1879             447 :             continue;
    1880                 : 
    1881                 :         /*
    1882                 :          * Matching of entries of one row is finished, so check row using
    1883                 :          * consistent functions.
    1884                 :          */
    1885             277 :         oldCtx = MemoryContextSwitchTo(so->tempCtx);
    1886             277 :         recheck = false;
    1887             277 :         match = true;
    1888                 : 
    1889             702 :         for (i = 0; i < so->nkeys; i++)
    1890                 :         {
    1891             425 :             GinScanKey  key = so->keys + i;
    1892                 : 
    1893             425 :             if (!key->boolConsistentFn(key))
    1894                 :             {
    1895 UBC           0 :                 match = false;
    1896               0 :                 break;
    1897                 :             }
    1898 CBC         425 :             recheck |= key->recheckCurItem;
    1899                 :         }
    1900                 : 
    1901             277 :         MemoryContextSwitchTo(oldCtx);
    1902             277 :         MemoryContextReset(so->tempCtx);
    1903                 : 
    1904             277 :         if (match)
    1905                 :         {
    1906             277 :             tbm_add_tuples(tbm, &pos.item, 1, recheck);
    1907             277 :             (*ntids)++;
    1908                 :         }
    1909                 :     }
    1910                 : 
    1911              83 :     pfree(pos.hasMatchKey);
    1912                 : }
    1913                 : 
    1914                 : 
    1915                 : #define GinIsVoidRes(s)     ( ((GinScanOpaque) scan->opaque)->isVoidRes )
    1916                 : 
    1917                 : int64
    1918             794 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
    1919                 : {
    1920             794 :     GinScanOpaque so = (GinScanOpaque) scan->opaque;
    1921                 :     int64       ntids;
    1922                 :     ItemPointerData iptr;
    1923                 :     bool        recheck;
    1924                 : 
    1925                 :     /*
    1926                 :      * Set up the scan keys, and check for unsatisfiable query.
    1927                 :      */
    1928             794 :     ginFreeScanKeys(so);        /* there should be no keys yet, but just to be
    1929                 :                                  * sure */
    1930             794 :     ginNewScanKey(scan);
    1931                 : 
    1932             794 :     if (GinIsVoidRes(scan))
    1933               6 :         return 0;
    1934                 : 
    1935             788 :     ntids = 0;
    1936                 : 
    1937                 :     /*
    1938                 :      * First, scan the pending list and collect any matching entries into the
    1939                 :      * bitmap.  After we scan a pending item, some other backend could post it
    1940                 :      * into the main index, and so we might visit it a second time during the
    1941                 :      * main scan.  This is okay because we'll just re-set the same bit in the
    1942                 :      * bitmap.  (The possibility of duplicate visits is a major reason why GIN
    1943                 :      * can't support the amgettuple API, however.) Note that it would not do
    1944                 :      * to scan the main index before the pending list, since concurrent
    1945                 :      * cleanup could then make us miss entries entirely.
    1946                 :      */
    1947             788 :     scanPendingInsert(scan, tbm, &ntids);
    1948                 : 
    1949                 :     /*
    1950                 :      * Now scan the main index.
    1951                 :      */
    1952             788 :     startScan(scan);
    1953                 : 
    1954             788 :     ItemPointerSetMin(&iptr);
    1955                 : 
    1956                 :     for (;;)
    1957                 :     {
    1958          489836 :         CHECK_FOR_INTERRUPTS();
    1959                 : 
    1960          489836 :         if (!scanGetItem(scan, iptr, &iptr, &recheck))
    1961             788 :             break;
    1962                 : 
    1963          489048 :         if (ItemPointerIsLossyPage(&iptr))
    1964 UBC           0 :             tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
    1965                 :         else
    1966 CBC      489048 :             tbm_add_tuples(tbm, &iptr, 1, recheck);
    1967          489048 :         ntids++;
    1968                 :     }
    1969                 : 
    1970             788 :     return ntids;
    1971                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a