LCOV - differential code coverage report
Current view: top level - src/backend/access/nbtree - nbtsearch.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 87.2 % 710 619 3 20 45 23 11 375 16 217 57 378 4
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 20 20 17 3 19 1
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 84.2 % 19 16 3 16
Legend: Lines: hit not hit (240..) days: 87.3 % 691 603 20 45 23 11 375 217 57 373
Function coverage date bins:
[..60] days: 100.0 % 3 3 3
(240..) days: 47.2 % 36 17 17 19

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * nbtsearch.c
                                  4                 :  *    Search code for postgres btrees.
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  8                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/access/nbtree/nbtsearch.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : 
                                 16                 : #include "postgres.h"
                                 17                 : 
                                 18                 : #include "access/nbtree.h"
                                 19                 : #include "access/relscan.h"
                                 20                 : #include "miscadmin.h"
                                 21                 : #include "pgstat.h"
                                 22                 : #include "storage/predicate.h"
                                 23                 : #include "utils/lsyscache.h"
                                 24                 : #include "utils/rel.h"
                                 25                 : 
                                 26                 : 
                                 27                 : static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
                                 28                 : static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf);
                                 29                 : static int  _bt_binsrch_posting(BTScanInsert key, Page page,
                                 30                 :                                 OffsetNumber offnum);
                                 31                 : static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
                                 32                 :                          OffsetNumber offnum);
                                 33                 : static void _bt_saveitem(BTScanOpaque so, int itemIndex,
                                 34                 :                          OffsetNumber offnum, IndexTuple itup);
                                 35                 : static int  _bt_setuppostingitems(BTScanOpaque so, int itemIndex,
                                 36                 :                                   OffsetNumber offnum, ItemPointer heapTid,
                                 37                 :                                   IndexTuple itup);
                                 38                 : static inline void _bt_savepostingitem(BTScanOpaque so, int itemIndex,
                                 39                 :                                        OffsetNumber offnum,
                                 40                 :                                        ItemPointer heapTid, int tupleOffset);
                                 41                 : static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
                                 42                 : static bool _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir);
                                 43                 : static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
                                 44                 :                                   ScanDirection dir);
                                 45                 : static Buffer _bt_walk_left(Relation rel, Relation heaprel, Buffer buf,
                                 46                 :                             Snapshot snapshot);
                                 47                 : static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
                                 48                 : static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir);
                                 49                 : 
                                 50                 : 
                                 51                 : /*
                                 52                 :  *  _bt_drop_lock_and_maybe_pin()
                                 53                 :  *
                                 54                 :  * Unlock the buffer; and if it is safe to release the pin, do that, too.
                                 55                 :  * This will prevent vacuum from stalling in a blocked state trying to read a
                                 56                 :  * page when a cursor is sitting on it.
                                 57                 :  *
                                 58                 :  * See nbtree/README section on making concurrent TID recycling safe.
                                 59                 :  */
                                 60                 : static void
 2937 kgrittn                    61 GIC     5022230 : _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
 2937 kgrittn                    62 ECB             : {
  992 pg                         63 GIC     5022230 :     _bt_unlockbuf(scan->indexRelation, sp->buf);
 2937 kgrittn                    64 ECB             : 
 2937 kgrittn                    65 GIC     5022230 :     if (IsMVCCSnapshot(scan->xs_snapshot) &&
 2937 kgrittn                    66 CBC     4849383 :         RelationNeedsWAL(scan->indexRelation) &&
                                 67         4846854 :         !scan->xs_want_itup)
 2937 kgrittn                    68 ECB             :     {
 2937 kgrittn                    69 GIC     4797883 :         ReleaseBuffer(sp->buf);
 2937 kgrittn                    70 CBC     4797883 :         sp->buf = InvalidBuffer;
 2937 kgrittn                    71 ECB             :     }
 2937 kgrittn                    72 GIC     5022230 : }
 9770 scrappy                    73 ECB             : 
                                 74                 : /*
                                 75                 :  *  _bt_search() -- Search the tree for a particular scankey,
                                 76                 :  *      or more precisely for the first leaf page it could be on.
                                 77                 :  *
                                 78                 :  * The passed scankey is an insertion-type scankey (see nbtree/README),
                                 79                 :  * but it can omit the rightmost column(s) of the index.
                                 80                 :  *
                                 81                 :  * Return value is a stack of parent-page pointers (i.e. there is no entry for
                                 82                 :  * the leaf level/page).  *bufP is set to the address of the leaf-page buffer,
                                 83                 :  * which is locked and pinned.  No locks are held on the parent pages,
                                 84                 :  * however!
                                 85                 :  *
                                 86                 :  * If the snapshot parameter is not NULL, "old snapshot" checking will take
                                 87                 :  * place during the descent through the tree.  This is not needed when
                                 88                 :  * positioning for an insert or delete, so NULL is used for those cases.
                                 89                 :  *
                                 90                 :  * The returned buffer is locked according to access parameter.  Additionally,
                                 91                 :  * access = BT_WRITE will allow an empty root page to be created and returned.
                                 92                 :  * When access = BT_READ, an empty index will result in *bufP being set to
                                 93                 :  * InvalidBuffer.  Also, in BT_WRITE mode, any incomplete splits encountered
                                 94                 :  * during the search will be finished.
                                 95                 :  */
                                 96                 : BTStack
    8 andres                     97 GNC    16390240 : _bt_search(Relation rel, Relation heaprel, BTScanInsert key, Buffer *bufP,
                                 98                 :            int access, Snapshot snapshot)
                                 99                 : {
 8053 bruce                     100 GIC    16390240 :     BTStack     stack_in = NULL;
 1716 akorotkov                 101 CBC    16390240 :     int         page_access = BT_READ;
 9345 bruce                     102 ECB             : 
                                103                 :     /* Get the root page to start with */
    8 andres                    104 GNC    16390240 :     *bufP = _bt_getroot(rel, heaprel, access);
 9345 bruce                     105 ECB             : 
                                106                 :     /* If index is empty and access = BT_READ, no root page is created. */
 8053 bruce                     107 GIC    16390240 :     if (!BufferIsValid(*bufP))
 8297 tgl                       108 CBC      385754 :         return (BTStack) NULL;
 9345 bruce                     109 ECB             : 
                                110                 :     /* Loop iterates once per level descended in the tree */
                                111                 :     for (;;)
 8297 tgl                       112 GIC    12274663 :     {
 8297 tgl                       113 ECB             :         Page        page;
                                114                 :         BTPageOpaque opaque;
                                115                 :         OffsetNumber offnum;
                                116                 :         ItemId      itemid;
                                117                 :         IndexTuple  itup;
                                118                 :         BlockNumber child;
                                119                 :         BTStack     new_stack;
                                120                 : 
                                121                 :         /*
                                122                 :          * Race -- the page we just grabbed may have split since we read its
                                123                 :          * downlink in its parent page (or the metapage).  If it has, we may
                                124                 :          * need to move right to its new sibling.  Do that.
                                125                 :          *
                                126                 :          * In write-mode, allow _bt_moveright to finish any incomplete splits
                                127                 :          * along the way.  Strictly speaking, we'd only need to finish an
                                128                 :          * incomplete split on the leaf page we're about to insert to, not on
                                129                 :          * any of the upper levels (internal pages with incomplete splits are
                                130                 :          * also taken care of in _bt_getstackbuf).  But this is a good
                                131                 :          * opportunity to finish splits of internal pages too.
                                132                 :          */
    8 andres                    133 GNC    28279149 :         *bufP = _bt_moveright(rel, heaprel, key, *bufP, (access == BT_WRITE),
                                134                 :                               stack_in, page_access, snapshot);
                                135                 : 
                                136                 :         /* if this is a leaf page, we're done */
 2545 kgrittn                   137 GIC    28279149 :         page = BufferGetPage(*bufP);
  373 michael                   138 CBC    28279149 :         opaque = BTPageGetOpaque(page);
 8297 tgl                       139        28279149 :         if (P_ISLEAF(opaque))
                                140        16004486 :             break;
 9345 bruce                     141 ECB             : 
                                142                 :         /*
                                143                 :          * Find the appropriate pivot tuple on this page.  Its downlink points
                                144                 :          * to the child page that we're about to descend to.
                                145                 :          */
 1481 pg                        146 GIC    12274663 :         offnum = _bt_binsrch(rel, key, *bufP);
 8297 tgl                       147 CBC    12274663 :         itemid = PageGetItemId(page, offnum);
 6283                           148        12274663 :         itup = (IndexTuple) PageGetItem(page, itemid);
 1138 pg                        149        12274663 :         Assert(BTreeTupleIsPivot(itup) || !key->heapkeyspace);
 1011                           150        12274663 :         child = BTreeTupleGetDownLink(itup);
 9345 bruce                     151 ECB             : 
                                152                 :         /*
                                153                 :          * We need to save the location of the pivot tuple we chose in a new
                                154                 :          * stack entry for this page/level.  If caller ends up splitting a
                                155                 :          * page one level down, it usually ends up inserting a new pivot
                                156                 :          * tuple/downlink immediately after the location recorded here.
                                157                 :          */
 8297 tgl                       158 GIC    12274663 :         new_stack = (BTStack) palloc(sizeof(BTStackData));
 1011 pg                        159 CBC    12274663 :         new_stack->bts_blkno = BufferGetBlockNumber(*bufP);
 8297 tgl                       160        12274663 :         new_stack->bts_offset = offnum;
                                161        12274663 :         new_stack->bts_parent = stack_in;
 9345 bruce                     162 ECB             : 
                                163                 :         /*
                                164                 :          * Page level 1 is lowest non-leaf page level prior to leaves.  So, if
                                165                 :          * we're on the level 1 and asked to lock leaf page in write mode,
                                166                 :          * then lock next page in write mode, because it must be a leaf.
                                167                 :          */
  774 pg                        168 GIC    12274663 :         if (opaque->btpo_level == 1 && access == BT_WRITE)
 1716 akorotkov                 169 CBC     5665181 :             page_access = BT_WRITE;
 1716 akorotkov                 170 ECB             : 
                                171                 :         /* drop the read lock on the page, then acquire one on its child */
 1011 pg                        172 GIC    12274663 :         *bufP = _bt_relandgetbuf(rel, *bufP, child, page_access);
 9345 bruce                     173 ECB             : 
                                174                 :         /* okay, all set to move down a level */
 8297 tgl                       175 GIC    12274663 :         stack_in = new_stack;
 8297 tgl                       176 ECB             :     }
                                177                 : 
                                178                 :     /*
                                179                 :      * If we're asked to lock leaf in write mode, but didn't manage to, then
                                180                 :      * relock.  This should only happen when the root page is a leaf page (and
                                181                 :      * the only page in the index other than the metapage).
                                182                 :      */
 1716 akorotkov                 183 GIC    16004486 :     if (access == BT_WRITE && page_access == BT_READ)
 1716 akorotkov                 184 ECB             :     {
                                185                 :         /* trade in our read lock for a write lock */
  992 pg                        186 GIC     1468243 :         _bt_unlockbuf(rel, *bufP);
  992 pg                        187 CBC     1468243 :         _bt_lockbuf(rel, *bufP, BT_WRITE);
 1716 akorotkov                 188 ECB             : 
                                189                 :         /*
                                190                 :          * Race -- the leaf page may have split after we dropped the read lock
                                191                 :          * but before we acquired a write lock.  If it has, we may need to
                                192                 :          * move right to its new sibling.  Do that.
                                193                 :          */
    8 andres                    194 GNC     1468243 :         *bufP = _bt_moveright(rel, heaprel, key, *bufP, true, stack_in, BT_WRITE,
 1481 pg                        195 ECB             :                               snapshot);
                                196                 :     }
                                197                 : 
 8297 tgl                       198 GIC    16004486 :     return stack_in;
 9770 scrappy                   199 ECB             : }
                                200                 : 
                                201                 : /*
                                202                 :  *  _bt_moveright() -- move right in the btree if necessary.
                                203                 :  *
                                204                 :  * When we follow a pointer to reach a page, it is possible that
                                205                 :  * the page has changed in the meanwhile.  If this happens, we're
                                206                 :  * guaranteed that the page has "split right" -- that is, that any
                                207                 :  * data that appeared on the page originally is either on the page
                                208                 :  * or strictly to the right of it.
                                209                 :  *
                                210                 :  * This routine decides whether or not we need to move right in the
                                211                 :  * tree by examining the high key entry on the page.  If that entry is
                                212                 :  * strictly less than the scankey, or <= the scankey in the
                                213                 :  * key.nextkey=true case, then we followed the wrong link and we need
                                214                 :  * to move right.
                                215                 :  *
                                216                 :  * The passed insertion-type scankey can omit the rightmost column(s) of the
                                217                 :  * index. (see nbtree/README)
                                218                 :  *
                                219                 :  * When key.nextkey is false (the usual case), we are looking for the first
                                220                 :  * item >= key.  When key.nextkey is true, we are looking for the first item
                                221                 :  * strictly greater than key.
                                222                 :  *
                                223                 :  * If forupdate is true, we will attempt to finish any incomplete splits
                                224                 :  * that we encounter.  This is required when locking a target page for an
                                225                 :  * insertion, because we don't allow inserting on a page before the split
                                226                 :  * is completed.  'stack' is only used if forupdate is true.
                                227                 :  *
                                228                 :  * On entry, we have the buffer pinned and a lock of the type specified by
                                229                 :  * 'access'.  If we move right, we release the buffer and lock and acquire
                                230                 :  * the same on the right sibling.  Return value is the buffer we stop at.
                                231                 :  *
                                232                 :  * If the snapshot parameter is not NULL, "old snapshot" checking will take
                                233                 :  * place during the descent through the tree.  This is not needed when
                                234                 :  * positioning for an insert or delete, so NULL is used for those cases.
                                235                 :  */
                                236                 : Buffer
 9770 scrappy                   237 GIC    29747392 : _bt_moveright(Relation rel,
                                238                 :               Relation heaprel,
 1481 pg                        239 ECB             :               BTScanInsert key,
                                240                 :               Buffer buf,
                                241                 :               bool forupdate,
                                242                 :               BTStack stack,
                                243                 :               int access,
                                244                 :               Snapshot snapshot)
                                245                 : {
                                246                 :     Page        page;
                                247                 :     BTPageOpaque opaque;
                                248                 :     int32       cmpval;
                                249                 : 
                                250                 :     /*
                                251                 :      * When nextkey = false (normal case): if the scan key that brought us to
                                252                 :      * this page is > the high key stored on the page, then the page has split
                                253                 :      * and we need to move right.  (pg_upgrade'd !heapkeyspace indexes could
                                254                 :      * have some duplicates to the right as well as the left, but that's
                                255                 :      * something that's only ever dealt with on the leaf level, after
                                256                 :      * _bt_search has found an initial leaf page.)
                                257                 :      *
                                258                 :      * When nextkey = true: move right if the scan key is >= page's high key.
                                259                 :      * (Note that key.scantid cannot be set in this case.)
                                260                 :      *
                                261                 :      * The page could even have split more than once, so scan as far as
                                262                 :      * needed.
                                263                 :      *
                                264                 :      * We also have to move right if we followed a link that brought us to a
                                265                 :      * dead page.
                                266                 :      */
 1481 pg                        267 GIC    29747392 :     cmpval = key->nextkey ? 0 : 1;
                                268                 : 
 3309 heikki.linnakangas        269 ECB             :     for (;;)
                                270                 :     {
 2545 kgrittn                   271 GIC    29748068 :         page = BufferGetPage(buf);
                                272        29748068 :         TestForOldSnapshot(snapshot, rel, page);
  373 michael                   273 CBC    29748068 :         opaque = BTPageGetOpaque(page);
 3309 heikki.linnakangas        274 ECB             : 
 3309 heikki.linnakangas        275 CBC    29748068 :         if (P_RIGHTMOST(opaque))
 3309 heikki.linnakangas        276 GIC    23650629 :             break;
 3309 heikki.linnakangas        277 ECB             : 
                                278                 :         /*
                                279                 :          * Finish any incomplete splits we encounter along the way.
                                280                 :          */
 3309 heikki.linnakangas        281 GIC     6097439 :         if (forupdate && P_INCOMPLETE_SPLIT(opaque))
 3309 heikki.linnakangas        282 UIC           0 :         {
 3309 heikki.linnakangas        283 LBC           0 :             BlockNumber blkno = BufferGetBlockNumber(buf);
 3309 heikki.linnakangas        284 EUB             : 
                                285                 :             /* upgrade our lock if necessary */
 3309 heikki.linnakangas        286 UIC           0 :             if (access == BT_READ)
                                287                 :             {
  992 pg                        288 UBC           0 :                 _bt_unlockbuf(rel, buf);
  992 pg                        289 UIC           0 :                 _bt_lockbuf(rel, buf, BT_WRITE);
 3309 heikki.linnakangas        290 EUB             :             }
                                291                 : 
 3309 heikki.linnakangas        292 UIC           0 :             if (P_INCOMPLETE_SPLIT(opaque))
    8 andres                    293 UNC           0 :                 _bt_finish_split(rel, heaprel, buf, stack);
 3309 heikki.linnakangas        294 EUB             :             else
 3309 heikki.linnakangas        295 UBC           0 :                 _bt_relbuf(rel, buf);
                                296                 : 
 3309 heikki.linnakangas        297 EUB             :             /* re-acquire the lock in the right mode, and re-check */
    8 andres                    298 UNC           0 :             buf = _bt_getbuf(rel, heaprel, blkno, access);
 3309 heikki.linnakangas        299 UIC           0 :             continue;
 3309 heikki.linnakangas        300 EUB             :         }
                                301                 : 
 1481 pg                        302 GIC     6097439 :         if (P_IGNORE(opaque) || _bt_compare(rel, key, page, P_HIKEY) >= cmpval)
                                303                 :         {
 3309 heikki.linnakangas        304 ECB             :             /* step right one page */
 3309 heikki.linnakangas        305 GIC         676 :             buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access);
                                306             676 :             continue;
 3309 heikki.linnakangas        307 ECB             :         }
                                308                 :         else
                                309                 :             break;
                                310                 :     }
                                311                 : 
 7351 tgl                       312 GIC    29747392 :     if (P_IGNORE(opaque))
 5578 tgl                       313 UIC           0 :         elog(ERROR, "fell off the end of index \"%s\"",
 7351 tgl                       314 ECB             :              RelationGetRelationName(rel));
 7351 tgl                       315 EUB             : 
 8986 bruce                     316 GIC    29747392 :     return buf;
                                317                 : }
 9770 scrappy                   318 ECB             : 
                                319                 : /*
                                320                 :  *  _bt_binsrch() -- Do a binary search for a key on a particular page.
                                321                 :  *
                                322                 :  * On a leaf page, _bt_binsrch() returns the OffsetNumber of the first
                                323                 :  * key >= given scankey, or > scankey if nextkey is true.  (NOTE: in
                                324                 :  * particular, this means it is possible to return a value 1 greater than the
                                325                 :  * number of keys on the page, if the scankey is > all keys on the page.)
                                326                 :  *
                                327                 :  * On an internal (non-leaf) page, _bt_binsrch() returns the OffsetNumber
                                328                 :  * of the last key < given scankey, or last key <= given scankey if nextkey
                                329                 :  * is true.  (Since _bt_compare treats the first data key of such a page as
                                330                 :  * minus infinity, there will be at least one key < scankey, so the result
                                331                 :  * always points at one of the keys on the page.)  This key indicates the
                                332                 :  * right place to descend to be sure we find all leaf keys >= given scankey
                                333                 :  * (or leaf keys > given scankey when nextkey is true).
                                334                 :  *
                                335                 :  * This procedure is not responsible for walking right, it just examines
                                336                 :  * the given page.  _bt_binsrch() has no lock or refcount side effects
                                337                 :  * on the buffer.
                                338                 :  */
                                339                 : static OffsetNumber
 9770 scrappy                   340 GIC    20943015 : _bt_binsrch(Relation rel,
                                341                 :             BTScanInsert key,
 1481 pg                        342 ECB             :             Buffer buf)
                                343                 : {
                                344                 :     Page        page;
                                345                 :     BTPageOpaque opaque;
                                346                 :     OffsetNumber low,
                                347                 :                 high;
                                348                 :     int32       result,
                                349                 :                 cmpval;
                                350                 : 
 2545 kgrittn                   351 GIC    20943015 :     page = BufferGetPage(buf);
  373 michael                   352        20943015 :     opaque = BTPageGetOpaque(page);
 9345 bruce                     353 ECB             : 
 1308 pg                        354                 :     /* Requesting nextkey semantics while using scantid seems nonsensical */
 1308 pg                        355 GIC    20943015 :     Assert(!key->nextkey || key->scantid == NULL);
                                356                 :     /* scantid-set callers must use _bt_binsrch_insert() on leaf pages */
 1308 pg                        357 CBC    20943015 :     Assert(!P_ISLEAF(opaque) || key->scantid == NULL);
                                358                 : 
 8297 tgl                       359        20943015 :     low = P_FIRSTDATAKEY(opaque);
 9345 bruce                     360 GIC    20943015 :     high = PageGetMaxOffsetNumber(page);
 9345 bruce                     361 ECB             : 
                                362                 :     /*
                                363                 :      * If there are no keys on the page, return the first available slot. Note
                                364                 :      * this covers two cases: the page is really empty (no keys), or it
                                365                 :      * contains only a high key.  The latter case is possible after vacuuming.
                                366                 :      * This can never happen on an internal page, however, since they are
                                367                 :      * never empty (an internal page must have children).
                                368                 :      */
 1481 pg                        369 GIC    20943015 :     if (unlikely(high < low))
 8986 bruce                     370            2170 :         return low;
 9345 bruce                     371 ECB             : 
 8668 tgl                       372                 :     /*
                                373                 :      * Binary search to find the first key on the page >= scan key, or first
                                374                 :      * key > scankey when nextkey is true.
                                375                 :      *
                                376                 :      * For nextkey=false (cmpval=1), the loop invariant is: all slots before
                                377                 :      * 'low' are < scan key, all slots at or after 'high' are >= scan key.
                                378                 :      *
                                379                 :      * For nextkey=true (cmpval=0), the loop invariant is: all slots before
                                380                 :      * 'low' are <= scan key, all slots at or after 'high' are > scan key.
                                381                 :      *
                                382                 :      * We can fall out when high == low.
                                383                 :      */
 8668 tgl                       384 GIC    20940845 :     high++;                     /* establish the loop invariant for high */
                                385                 : 
 1481 pg                        386 CBC    20940845 :     cmpval = key->nextkey ? 0 : 1;   /* select comparison value */
                                387                 : 
 8668 tgl                       388       127472718 :     while (high > low)
                                389                 :     {
 8397 bruce                     390       106531873 :         OffsetNumber mid = low + ((high - low) / 2);
                                391                 : 
 8668 tgl                       392 ECB             :         /* We have low <= mid < high, so mid points at a real slot */
                                393                 : 
 1481 pg                        394 GIC   106531873 :         result = _bt_compare(rel, key, page, mid);
                                395                 : 
 7049 tgl                       396 CBC   106531873 :         if (result >= cmpval)
 8668 tgl                       397 GIC    70298342 :             low = mid + 1;
 9345 bruce                     398 ECB             :         else
 8668 tgl                       399 CBC    36233531 :             high = mid;
                                400                 :     }
 9345 bruce                     401 ECB             : 
                                402                 :     /*
                                403                 :      * At this point we have high == low, but be careful: they could point
                                404                 :      * past the last slot on the page.
                                405                 :      *
                                406                 :      * On a leaf page, we always return the first key >= scan key (resp. >
                                407                 :      * scan key), which could be the last slot + 1.
                                408                 :      */
 8297 tgl                       409 GIC    20940845 :     if (P_ISLEAF(opaque))
 8668                           410         8666182 :         return low;
 9345 bruce                     411 ECB             : 
 8053                           412                 :     /*
                                413                 :      * On a non-leaf page, return the last key < scan key (resp. <= scan key).
                                414                 :      * There must be one if _bt_compare() is playing by the rules.
                                415                 :      */
 8297 tgl                       416 GIC    12274663 :     Assert(low > P_FIRSTDATAKEY(opaque));
                                417                 : 
 8668 tgl                       418 CBC    12274663 :     return OffsetNumberPrev(low);
                                419                 : }
 9770 scrappy                   420 ECB             : 
                                421                 : /*
                                422                 :  *
                                423                 :  *  _bt_binsrch_insert() -- Cacheable, incremental leaf page binary search.
                                424                 :  *
                                425                 :  * Like _bt_binsrch(), but with support for caching the binary search
                                426                 :  * bounds.  Only used during insertion, and only on the leaf page that it
                                427                 :  * looks like caller will insert tuple on.  Exclusive-locked and pinned
                                428                 :  * leaf page is contained within insertstate.
                                429                 :  *
                                430                 :  * Caches the bounds fields in insertstate so that a subsequent call can
                                431                 :  * reuse the low and strict high bounds of original binary search.  Callers
                                432                 :  * that use these fields directly must be prepared for the case where low
                                433                 :  * and/or stricthigh are not on the same page (one or both exceed maxoff
                                434                 :  * for the page).  The case where there are no items on the page (high <
                                435                 :  * low) makes bounds invalid.
                                436                 :  *
                                437                 :  * Caller is responsible for invalidating bounds when it modifies the page
                                438                 :  * before calling here a second time, and for dealing with posting list
                                439                 :  * tuple matches (callers can use insertstate's postingoff field to
                                440                 :  * determine which existing heap TID will need to be replaced by a posting
                                441                 :  * list split).
                                442                 :  */
                                443                 : OffsetNumber
 1481 pg                        444 GIC    12616278 : _bt_binsrch_insert(Relation rel, BTInsertState insertstate)
                                445                 : {
 1481 pg                        446 CBC    12616278 :     BTScanInsert key = insertstate->itup_key;
                                447                 :     Page        page;
 1481 pg                        448 ECB             :     BTPageOpaque opaque;
                                449                 :     OffsetNumber low,
                                450                 :                 high,
                                451                 :                 stricthigh;
                                452                 :     int32       result,
                                453                 :                 cmpval;
                                454                 : 
 1481 pg                        455 GIC    12616278 :     page = BufferGetPage(insertstate->buf);
  373 michael                   456        12616278 :     opaque = BTPageGetOpaque(page);
 1481 pg                        457 ECB             : 
 1481 pg                        458 CBC    12616278 :     Assert(P_ISLEAF(opaque));
 1481 pg                        459 GIC    12616278 :     Assert(!key->nextkey);
 1138 pg                        460 CBC    12616278 :     Assert(insertstate->postingoff == 0);
 1481 pg                        461 ECB             : 
 1481 pg                        462 CBC    12616278 :     if (!insertstate->bounds_valid)
                                463                 :     {
 1481 pg                        464 ECB             :         /* Start new binary search */
 1481 pg                        465 GIC     7384948 :         low = P_FIRSTDATAKEY(opaque);
                                466         7384948 :         high = PageGetMaxOffsetNumber(page);
 1481 pg                        467 ECB             :     }
                                468                 :     else
                                469                 :     {
                                470                 :         /* Restore result of previous binary search against same page */
 1481 pg                        471 GIC     5231330 :         low = insertstate->low;
                                472         5231330 :         high = insertstate->stricthigh;
 1481 pg                        473 ECB             :     }
                                474                 : 
                                475                 :     /* If there are no keys on the page, return the first available slot */
 1481 pg                        476 GIC    12616278 :     if (unlikely(high < low))
                                477                 :     {
 1481 pg                        478 ECB             :         /* Caller can't reuse bounds */
 1481 pg                        479 GIC       19103 :         insertstate->low = InvalidOffsetNumber;
                                480           19103 :         insertstate->stricthigh = InvalidOffsetNumber;
 1481 pg                        481 CBC       19103 :         insertstate->bounds_valid = false;
                                482           19103 :         return low;
 1481 pg                        483 ECB             :     }
                                484                 : 
                                485                 :     /*
                                486                 :      * Binary search to find the first key on the page >= scan key. (nextkey
                                487                 :      * is always false when inserting).
                                488                 :      *
                                489                 :      * The loop invariant is: all slots before 'low' are < scan key, all slots
                                490                 :      * at or after 'high' are >= scan key.  'stricthigh' is > scan key, and is
                                491                 :      * maintained to save additional search effort for caller.
                                492                 :      *
                                493                 :      * We can fall out when high == low.
                                494                 :      */
 1481 pg                        495 GIC    12597175 :     if (!insertstate->bounds_valid)
                                496         7365845 :         high++;                 /* establish the loop invariant for high */
 1481 pg                        497 CBC    12597175 :     stricthigh = high;          /* high initially strictly higher */
 1481 pg                        498 ECB             : 
 1481 pg                        499 CBC    12597175 :     cmpval = 1;                 /* !nextkey comparison value */
                                500                 : 
                                501        65283644 :     while (high > low)
                                502                 :     {
                                503        52686469 :         OffsetNumber mid = low + ((high - low) / 2);
                                504                 : 
 1481 pg                        505 ECB             :         /* We have low <= mid < high, so mid points at a real slot */
                                506                 : 
 1481 pg                        507 GIC    52686469 :         result = _bt_compare(rel, key, page, mid);
                                508                 : 
 1481 pg                        509 CBC    52686469 :         if (result >= cmpval)
 1481 pg                        510 GIC    40616979 :             low = mid + 1;
 1481 pg                        511 ECB             :         else
                                512                 :         {
 1481 pg                        513 GIC    12069490 :             high = mid;
                                514        12069490 :             if (result != 0)
 1481 pg                        515 CBC    11435404 :                 stricthigh = high;
 1481 pg                        516 ECB             :         }
 1138                           517                 : 
                                518                 :         /*
                                519                 :          * If tuple at offset located by binary search is a posting list whose
                                520                 :          * TID range overlaps with caller's scantid, perform posting list
                                521                 :          * binary search to set postingoff for caller.  Caller must split the
                                522                 :          * posting list when postingoff is set.  This should happen
                                523                 :          * infrequently.
                                524                 :          */
 1138 pg                        525 GIC    52686469 :         if (unlikely(result == 0 && key->scantid != NULL))
                                526                 :         {
  529 pg                        527 ECB             :             /*
                                528                 :              * postingoff should never be set more than once per leaf page
                                529                 :              * binary search.  That would mean that there are duplicate table
                                530                 :              * TIDs in the index, which is never okay.  Check for that here.
                                531                 :              */
  529 pg                        532 GIC      207807 :             if (insertstate->postingoff != 0)
  529 pg                        533 UIC           0 :                 ereport(ERROR,
  529 pg                        534 ECB             :                         (errcode(ERRCODE_INDEX_CORRUPTED),
  529 pg                        535 EUB             :                          errmsg_internal("table tid from new index tuple (%u,%u) cannot find insert offset between offsets %u and %u of block %u in index \"%s\"",
                                536                 :                                          ItemPointerGetBlockNumber(key->scantid),
                                537                 :                                          ItemPointerGetOffsetNumber(key->scantid),
                                538                 :                                          low, stricthigh,
                                539                 :                                          BufferGetBlockNumber(insertstate->buf),
                                540                 :                                          RelationGetRelationName(rel))));
                                541                 : 
 1138 pg                        542 GIC      207807 :             insertstate->postingoff = _bt_binsrch_posting(key, page, mid);
                                543                 :         }
 1481 pg                        544 ECB             :     }
                                545                 : 
                                546                 :     /*
                                547                 :      * On a leaf page, a binary search always returns the first key >= scan
                                548                 :      * key (at least in !nextkey case), which could be the last slot + 1. This
                                549                 :      * is also the lower bound of cached search.
                                550                 :      *
                                551                 :      * stricthigh may also be the last slot + 1, which prevents caller from
                                552                 :      * using bounds directly, but is still useful to us if we're called a
                                553                 :      * second time with cached bounds (cached low will be < stricthigh when
                                554                 :      * that happens).
                                555                 :      */
 1481 pg                        556 GIC    12597175 :     insertstate->low = low;
                                557        12597175 :     insertstate->stricthigh = stricthigh;
 1481 pg                        558 CBC    12597175 :     insertstate->bounds_valid = true;
 1481 pg                        559 ECB             : 
 1481 pg                        560 CBC    12597175 :     return low;
                                561                 : }
 1481 pg                        562 ECB             : 
                                563                 : /*----------
                                564                 :  *  _bt_binsrch_posting() -- posting list binary search.
                                565                 :  *
                                566                 :  * Helper routine for _bt_binsrch_insert().
                                567                 :  *
                                568                 :  * Returns offset into posting list where caller's scantid belongs.
                                569                 :  *----------
                                570                 :  */
                                571                 : static int
 1138 pg                        572 GIC      207807 : _bt_binsrch_posting(BTScanInsert key, Page page, OffsetNumber offnum)
                                573                 : {
 1138 pg                        574 ECB             :     IndexTuple  itup;
                                575                 :     ItemId      itemid;
                                576                 :     int         low,
                                577                 :                 high,
                                578                 :                 mid,
                                579                 :                 res;
                                580                 : 
                                581                 :     /*
                                582                 :      * If this isn't a posting tuple, then the index must be corrupt (if it is
                                583                 :      * an ordinary non-pivot tuple then there must be an existing tuple with a
                                584                 :      * heap TID that equals inserter's new heap TID/scantid).  Defensively
                                585                 :      * check that tuple is a posting list tuple whose posting list range
                                586                 :      * includes caller's scantid.
                                587                 :      *
                                588                 :      * (This is also needed because contrib/amcheck's rootdescend option needs
                                589                 :      * to be able to relocate a non-pivot tuple using _bt_binsrch_insert().)
                                590                 :      */
 1138 pg                        591 GIC      207807 :     itemid = PageGetItemId(page, offnum);
                                592          207807 :     itup = (IndexTuple) PageGetItem(page, itemid);
 1138 pg                        593 CBC      207807 :     if (!BTreeTupleIsPosting(itup))
                                594          200041 :         return 0;
 1138 pg                        595 ECB             : 
 1138 pg                        596 CBC        7766 :     Assert(key->heapkeyspace && key->allequalimage);
                                597                 : 
 1138 pg                        598 ECB             :     /*
                                599                 :      * In the event that posting list tuple has LP_DEAD bit set, indicate this
                                600                 :      * to _bt_binsrch_insert() caller by returning -1, a sentinel value.  A
                                601                 :      * second call to _bt_binsrch_insert() can take place when its caller has
                                602                 :      * removed the dead item.
                                603                 :      */
 1138 pg                        604 GIC        7766 :     if (ItemIdIsDead(itemid))
 1138 pg                        605 UIC           0 :         return -1;
 1138 pg                        606 ECB             : 
 1138 pg                        607 EUB             :     /* "high" is past end of posting list for loop invariant */
 1138 pg                        608 GIC        7766 :     low = 0;
                                609            7766 :     high = BTreeTupleGetNPosting(itup);
 1138 pg                        610 CBC        7766 :     Assert(high >= 2);
 1138 pg                        611 ECB             : 
 1138 pg                        612 CBC       62476 :     while (high > low)
                                613                 :     {
                                614           54710 :         mid = low + ((high - low) / 2);
 1138 pg                        615 GIC       54710 :         res = ItemPointerCompare(key->scantid,
 1138 pg                        616 ECB             :                                  BTreeTupleGetPostingN(itup, mid));
                                617                 : 
 1138 pg                        618 GIC       54710 :         if (res > 0)
                                619           28848 :             low = mid + 1;
 1138 pg                        620 CBC       25862 :         else if (res < 0)
                                621           25862 :             high = mid;
 1138 pg                        622 ECB             :         else
 1138 pg                        623 LBC           0 :             return mid;
                                624                 :     }
 1138 pg                        625 EUB             : 
                                626                 :     /* Exact match not found */
 1138 pg                        627 GIC        7766 :     return low;
                                628                 : }
 1138 pg                        629 ECB             : 
                                630                 : /*----------
                                631                 :  *  _bt_compare() -- Compare insertion-type scankey to tuple on a page.
                                632                 :  *
                                633                 :  *  page/offnum: location of btree item to be compared to.
                                634                 :  *
                                635                 :  *      This routine returns:
                                636                 :  *          <0 if scankey < tuple at offnum;
                                637                 :  *           0 if scankey == tuple at offnum;
                                638                 :  *          >0 if scankey > tuple at offnum.
                                639                 :  *
                                640                 :  * NULLs in the keys are treated as sortable values.  Therefore
                                641                 :  * "equality" does not necessarily mean that the item should be returned
                                642                 :  * to the caller as a matching key.  Similarly, an insertion scankey
                                643                 :  * with its scantid set is treated as equal to a posting tuple whose TID
                                644                 :  * range overlaps with their scantid.  There generally won't be a
                                645                 :  * matching TID in the posting tuple, which caller must handle
                                646                 :  * themselves (e.g., by splitting the posting list tuple).
                                647                 :  *
                                648                 :  * CRUCIAL NOTE: on a non-leaf page, the first data key is assumed to be
                                649                 :  * "minus infinity": this routine will always claim it is less than the
                                650                 :  * scankey.  The actual key value stored is explicitly truncated to 0
                                651                 :  * attributes (explicitly minus infinity) with version 3+ indexes, but
                                652                 :  * that isn't relied upon.  This allows us to implement the Lehman and
                                653                 :  * Yao convention that the first down-link pointer is before the first
                                654                 :  * key.  See backend/access/nbtree/README for details.
                                655                 :  *----------
                                656                 :  */
                                657                 : int32
 9770 scrappy                   658 GIC   177440905 : _bt_compare(Relation rel,
                                659                 :             BTScanInsert key,
 8297 tgl                       660 ECB             :             Page page,
                                661                 :             OffsetNumber offnum)
                                662                 : {
 8297 tgl                       663 GIC   177440905 :     TupleDesc   itupdesc = RelationGetDescr(rel);
  373 michael                   664       177440905 :     BTPageOpaque opaque = BTPageGetOpaque(page);
 9344 bruce                     665 ECB             :     IndexTuple  itup;
 1481 pg                        666                 :     ItemPointer heapTid;
                                667                 :     ScanKey     scankey;
                                668                 :     int         ncmpkey;
                                669                 :     int         ntupatts;
                                670                 :     int32       result;
                                671                 : 
 1481 pg                        672 GIC   177440905 :     Assert(_bt_check_natts(rel, key->heapkeyspace, page, offnum));
                                673       177440905 :     Assert(key->keysz <= IndexRelationGetNumberOfKeyAttributes(rel));
 1481 pg                        674 CBC   177440905 :     Assert(key->heapkeyspace || key->scantid == NULL);
 1828 teodor                    675 ECB             : 
 9770 scrappy                   676                 :     /*
                                677                 :      * Force result ">" if target item is first data item on an internal page
                                678                 :      * --- see NOTE above.
                                679                 :      */
 8053 bruce                     680 GIC   177440905 :     if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque))
 8986                           681         1889977 :         return 1;
 9345 bruce                     682 ECB             : 
 6283 tgl                       683 CBC   175550928 :     itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 1481 pg                        684 GIC   175550928 :     ntupatts = BTreeTupleGetNAtts(itup, rel);
 9345 bruce                     685 ECB             : 
 9770 scrappy                   686                 :     /*
                                687                 :      * The scan key is set up with the attribute number associated with each
                                688                 :      * term in the key.  It is important that, if the index is multi-key, the
                                689                 :      * scan contain the first k key attributes, and that they be in order.  If
                                690                 :      * you think about how multi-key ordering works, you'll understand why
                                691                 :      * this is.
                                692                 :      *
                                693                 :      * We don't test for violation of this condition here, however.  The
                                694                 :      * initial setup for the index scan had better have gotten it right (see
                                695                 :      * _bt_first).
                                696                 :      */
                                697                 : 
 1481 pg                        698 GIC   175550928 :     ncmpkey = Min(ntupatts, key->keysz);
                                699       175550928 :     Assert(key->heapkeyspace || ncmpkey == key->keysz);
 1138 pg                        700 CBC   175550928 :     Assert(!BTreeTupleIsPosting(itup) || key->allequalimage);
 1481                           701       175550928 :     scankey = key->scankeys;
                                702       220720244 :     for (int i = 1; i <= ncmpkey; i++)
 9512 vadim4o                   703 ECB             :     {
 8297 tgl                       704                 :         Datum       datum;
                                705                 :         bool        isNull;
                                706                 : 
 7088 tgl                       707 GIC   207864745 :         datum = index_getattr(itup, scankey->sk_attno, itupdesc, &isNull);
                                708                 : 
 2118 tgl                       709 CBC   207864745 :         if (scankey->sk_flags & SK_ISNULL)   /* key is NULL */
                                710                 :         {
 8297                           711          170918 :             if (isNull)
 8349 tgl                       712 GIC          83 :                 result = 0;     /* NULL "=" NULL */
 5934 tgl                       713 CBC      170835 :             else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
                                714             132 :                 result = -1;    /* NULL "<" NOT_NULL */
 9345 bruce                     715 ECB             :             else
 8349 tgl                       716 CBC      170703 :                 result = 1;     /* NULL ">" NOT_NULL */
                                717                 :         }
 8297                           718       207693827 :         else if (isNull)        /* key is NOT_NULL and item is NULL */
                                719                 :         {
 5934                           720             132 :             if (scankey->sk_flags & SK_BT_NULLS_FIRST)
 5934 tgl                       721 UIC           0 :                 result = 1;     /* NOT_NULL ">" NULL */
 5934 tgl                       722 ECB             :             else
 5934 tgl                       723 GBC         132 :                 result = -1;    /* NOT_NULL "<" NULL */
                                724                 :         }
 9345 bruce                     725 ECB             :         else
                                726                 :         {
                                727                 :             /*
                                728                 :              * The sk_func needs to be passed the index value as left arg and
                                729                 :              * the sk_argument as right arg (they might be of different
                                730                 :              * types).  Since it is convenient for callers to think of
                                731                 :              * _bt_compare as comparing the scankey to the index item, we have
                                732                 :              * to flip the sign of the comparison result.  (Unless it's a DESC
                                733                 :              * column, in which case we *don't* flip the sign.)
                                734                 :              */
 4380 tgl                       735 GIC   207693695 :             result = DatumGetInt32(FunctionCall2Coll(&scankey->sk_func,
                                736                 :                                                      scankey->sk_collation,
 4380 tgl                       737 ECB             :                                                      datum,
                                738                 :                                                      scankey->sk_argument));
                                739                 : 
 5934 tgl                       740 GIC   207693695 :             if (!(scankey->sk_flags & SK_BT_DESC))
 1647                           741       207693692 :                 INVERT_COMPARE_RESULT(result);
 8297 tgl                       742 ECB             :         }
 9345 bruce                     743                 : 
                                744                 :         /* if the keys are unequal, return the difference */
 9345 bruce                     745 GIC   207864745 :         if (result != 0)
 8986                           746       162695429 :             return result;
 7088 tgl                       747 ECB             : 
 7088 tgl                       748 CBC    45169316 :         scankey++;
                                749                 :     }
 9345 bruce                     750 ECB             : 
                                751                 :     /*
                                752                 :      * All non-truncated attributes (other than heap TID) were found to be
                                753                 :      * equal.  Treat truncated attributes as minus infinity when scankey has a
                                754                 :      * key attribute value that would otherwise be compared directly.
                                755                 :      *
                                756                 :      * Note: it doesn't matter if ntupatts includes non-key attributes;
                                757                 :      * scankey won't, so explicitly excluding non-key attributes isn't
                                758                 :      * necessary.
                                759                 :      */
 1481 pg                        760 GIC    12855499 :     if (key->keysz > ntupatts)
                                761          125172 :         return 1;
 1481 pg                        762 ECB             : 
                                763                 :     /*
                                764                 :      * Use the heap TID attribute and scantid to try to break the tie.  The
                                765                 :      * rules are the same as any other key attribute -- only the
                                766                 :      * representation differs.
                                767                 :      */
 1481 pg                        768 GIC    12730327 :     heapTid = BTreeTupleGetHeapTID(itup);
                                769        12730327 :     if (key->scantid == NULL)
 1481 pg                        770 ECB             :     {
                                771                 :         /*
                                772                 :          * Most searches have a scankey that is considered greater than a
                                773                 :          * truncated pivot tuple if and when the scankey has equal values for
                                774                 :          * attributes up to and including the least significant untruncated
                                775                 :          * attribute in tuple.
                                776                 :          *
                                777                 :          * For example, if an index has the minimum two attributes (single
                                778                 :          * user key attribute, plus heap TID attribute), and a page's high key
                                779                 :          * is ('foo', -inf), and scankey is ('foo', <omitted>), the search
                                780                 :          * will not descend to the page to the left.  The search will descend
                                781                 :          * right instead.  The truncated attribute in pivot tuple means that
                                782                 :          * all non-pivot tuples on the page to the left are strictly < 'foo',
                                783                 :          * so it isn't necessary to descend left.  In other words, search
                                784                 :          * doesn't have to descend left because it isn't interested in a match
                                785                 :          * that has a heap TID value of -inf.
                                786                 :          *
                                787                 :          * However, some searches (pivotsearch searches) actually require that
                                788                 :          * we descend left when this happens.  -inf is treated as a possible
                                789                 :          * match for omitted scankey attribute(s).  This is needed by page
                                790                 :          * deletion, which must re-find leaf pages that are targets for
                                791                 :          * deletion using their high keys.
                                792                 :          *
                                793                 :          * Note: the heap TID part of the test ensures that scankey is being
                                794                 :          * compared to a pivot tuple with one or more truncated key
                                795                 :          * attributes.
                                796                 :          *
                                797                 :          * Note: pg_upgrade'd !heapkeyspace indexes must always descend to the
                                798                 :          * left here, since they have no heap TID attribute (and cannot have
                                799                 :          * any -inf key values in any case, since truncation can only remove
                                800                 :          * non-key attributes).  !heapkeyspace searches must always be
                                801                 :          * prepared to deal with matches on both sides of the pivot once the
                                802                 :          * leaf level is reached.
                                803                 :          */
 1481 pg                        804 GIC     9213987 :         if (key->heapkeyspace && !key->pivotsearch &&
                                805         9207253 :             key->keysz == ntupatts && heapTid == NULL)
 1481 pg                        806 CBC        7404 :             return 1;
 1481 pg                        807 ECB             : 
                                808                 :         /* All provided scankey arguments found to be equal */
 1481 pg                        809 GIC     9206583 :         return 0;
                                810                 :     }
 1481 pg                        811 ECB             : 
                                812                 :     /*
                                813                 :      * Treat truncated heap TID as minus infinity, since scankey has a key
                                814                 :      * attribute value (scantid) that would otherwise be compared directly
                                815                 :      */
 1481 pg                        816 GIC     3516340 :     Assert(key->keysz == IndexRelationGetNumberOfKeyAttributes(rel));
                                817         3516340 :     if (heapTid == NULL)
 1481 pg                        818 CBC        1978 :         return 1;
 1481 pg                        819 ECB             : 
 1138                           820                 :     /*
                                821                 :      * Scankey must be treated as equal to a posting list tuple if its scantid
                                822                 :      * value falls within the range of the posting list.  In all other cases
                                823                 :      * there can only be a single heap TID value, which is compared directly
                                824                 :      * with scantid.
                                825                 :      */
 1481 pg                        826 GIC     3514362 :     Assert(ntupatts >= IndexRelationGetNumberOfKeyAttributes(rel));
 1138                           827         3514362 :     result = ItemPointerCompare(key->scantid, heapTid);
 1138 pg                        828 CBC     3513995 :     if (result <= 0 || !BTreeTupleIsPosting(itup))
                                829         3394046 :         return result;
 1138 pg                        830 ECB             :     else
                                831                 :     {
 1138 pg                        832 GIC      119949 :         result = ItemPointerCompare(key->scantid,
                                833                 :                                     BTreeTupleGetMaxHeapTID(itup));
 1138 pg                        834 CBC      119949 :         if (result > 0)
 1138 pg                        835 GIC      112183 :             return 1;
 1138 pg                        836 ECB             :     }
                                837                 : 
 1138 pg                        838 GIC        7766 :     return 0;
                                839                 : }
 9770 scrappy                   840 ECB             : 
                                841                 : /*
                                842                 :  *  _bt_first() -- Find the first item in a scan.
                                843                 :  *
                                844                 :  *      We need to be clever about the direction of scan, the search
                                845                 :  *      conditions, and the tree ordering.  We find the first item (or,
                                846                 :  *      if backwards scan, the last item) in the tree that satisfies the
                                847                 :  *      qualifications in the scan key.  On success exit, the page containing
                                848                 :  *      the current index tuple is pinned but not locked, and data about
                                849                 :  *      the matching tuple(s) on the page has been loaded into so->currPos.
                                850                 :  *      scan->xs_ctup.t_self is set to the heap TID of the current tuple,
                                851                 :  *      and if requested, scan->xs_itup points to a copy of the index tuple.
                                852                 :  *
                                853                 :  * If there are no matching items in the index, we return false, with no
                                854                 :  * pins or locks held.
                                855                 :  *
                                856                 :  * Note that scan->keyData[], and the so->keyData[] scankey built from it,
                                857                 :  * are both search-type scankeys (see nbtree/README for more about this).
                                858                 :  * Within this routine, we build a temporary insertion-type scankey to use
                                859                 :  * in locating the scan start position.
                                860                 :  */
                                861                 : bool
 9770 scrappy                   862 GIC     9087979 : _bt_first(IndexScanDesc scan, ScanDirection dir)
                                863                 : {
 7625 tgl                       864 CBC     9087979 :     Relation    rel = scan->indexRelation;
    8 andres                    865 GNC     9087979 :     Relation    heaprel = scan->heapRelation;
 7625 tgl                       866 GIC     9087979 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 9344 bruce                     867 ECB             :     Buffer      buf;
                                868                 :     BTStack     stack;
 8297 tgl                       869                 :     OffsetNumber offnum;
                                870                 :     StrategyNumber strat;
                                871                 :     bool        nextkey;
                                872                 :     bool        goback;
                                873                 :     BTScanInsertData inskey;
                                874                 :     ScanKey     startKeys[INDEX_MAX_KEYS];
                                875                 :     ScanKeyData notnullkeys[INDEX_MAX_KEYS];
 8397 bruce                     876 GIC     9087979 :     int         keysCount = 0;
                                877                 :     int         i;
                                878                 :     bool        status;
 8397 bruce                     879 ECB             :     StrategyNumber strat_total;
                                880                 :     BTScanPosItem *currItem;
                                881                 :     BlockNumber blkno;
                                882                 : 
 2937 kgrittn                   883 GIC     9087979 :     Assert(!BTScanPosIsValid(so->currPos));
                                884                 : 
 5796 tgl                       885         9087979 :     pgstat_count_index_scan(rel);
 6394 tgl                       886 ECB             : 
                                887                 :     /*
 6285                           888                 :      * Examine the scan keys and eliminate any redundant keys; also mark the
                                889                 :      * keys that must be matched to continue the scan.
                                890                 :      */
 7088 tgl                       891 GIC     9087979 :     _bt_preprocess_keys(scan);
                                892                 : 
                                893                 :     /*
 7088 tgl                       894 ECB             :      * Quit now if _bt_preprocess_keys() discovered that the scan keys can
                                895                 :      * never be satisfied (eg, x == 1 AND x > 2).
                                896                 :      */
 8053 bruce                     897 GIC     9087979 :     if (!so->qual_ok)
                                898                 :     {
                                899                 :         /* Notify any other workers that we're done with this scan key. */
  934 akapila                   900 CBC        1286 :         _bt_parallel_done(scan);
 7629 tgl                       901 GIC        1286 :         return false;
                                902                 :     }
 8293 tgl                       903 ECB             : 
 2244 rhaas                     904                 :     /*
                                905                 :      * For parallel scans, get the starting page from shared state. If the
                                906                 :      * scan has not started, proceed to find out first leaf page in the usual
                                907                 :      * way while keeping other participating processes waiting.  If the scan
                                908                 :      * has already begun, use the page number from the shared structure.
                                909                 :      */
 2244 rhaas                     910 GIC     9086693 :     if (scan->parallel_scan != NULL)
                                911                 :     {
                                912             184 :         status = _bt_parallel_seize(scan, &blkno);
 2244 rhaas                     913 CBC         184 :         if (!status)
 2244 rhaas                     914 GIC         140 :             return false;
 2244 rhaas                     915 CBC          44 :         else if (blkno == P_NONE)
 2244 rhaas                     916 ECB             :         {
 2244 rhaas                     917 CBC           2 :             _bt_parallel_done(scan);
                                918               2 :             return false;
                                919                 :         }
                                920              42 :         else if (blkno != InvalidBlockNumber)
 2244 rhaas                     921 ECB             :         {
 2244 rhaas                     922 GIC           4 :             if (!_bt_parallel_readpage(scan, blkno, dir))
 2244 rhaas                     923 LBC           0 :                 return false;
 2244 rhaas                     924 GIC           4 :             goto readcomplete;
 2244 rhaas                     925 ECB             :         }
 2244 rhaas                     926 EUB             :     }
 2244 rhaas                     927 ECB             : 
                                928                 :     /*----------
                                929                 :      * Examine the scan keys to discover where we need to start the scan.
                                930                 :      *
                                931                 :      * We want to identify the keys that can be used as starting boundaries;
                                932                 :      * these are =, >, or >= keys for a forward scan or =, <, <= keys for
                                933                 :      * a backwards scan.  We can use keys for multiple attributes so long as
                                934                 :      * the prior attributes had only =, >= (resp. =, <=) keys.  Once we accept
                                935                 :      * a > or < boundary or find an attribute with no boundary (which can be
                                936                 :      * thought of as the same as "> -infinity"), we can't use keys for any
                                937                 :      * attributes to its right, because it would break our simplistic notion
                                938                 :      * of what initial positioning strategy to use.
                                939                 :      *
                                940                 :      * When the scan keys include cross-type operators, _bt_preprocess_keys
                                941                 :      * may not be able to eliminate redundant keys; in such cases we will
                                942                 :      * arbitrarily pick a usable one for each attribute.  This is correct
                                943                 :      * but possibly not optimal behavior.  (For example, with keys like
                                944                 :      * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
                                945                 :      * x=5 would be more efficient.)  Since the situation only arises given
                                946                 :      * a poorly-worded query plus an incomplete opfamily, live with it.
                                947                 :      *
                                948                 :      * When both equality and inequality keys appear for a single attribute
                                949                 :      * (again, only possible when cross-type operators appear), we *must*
                                950                 :      * select one of the equality keys for the starting point, because
                                951                 :      * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
                                952                 :      * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
                                953                 :      * start at x=4, we will fail and stop before reaching x=10.  If multiple
                                954                 :      * equality quals survive preprocessing, however, it doesn't matter which
                                955                 :      * one we use --- by definition, they are either redundant or
                                956                 :      * contradictory.
                                957                 :      *
                                958                 :      * Any regular (not SK_SEARCHNULL) key implies a NOT NULL qualifier.
                                959                 :      * If the index stores nulls at the end of the index we'll be starting
                                960                 :      * from, and we have no boundary key for the column (which means the key
                                961                 :      * we deduced NOT NULL from is an inequality key that constrains the other
                                962                 :      * end of the index), then we cons up an explicit SK_SEARCHNOTNULL key to
                                963                 :      * use as a boundary key.  If we didn't do this, we might find ourselves
                                964                 :      * traversing a lot of null entries at the start of the scan.
                                965                 :      *
                                966                 :      * In this loop, row-comparison keys are treated the same as keys on their
                                967                 :      * first (leftmost) columns.  We'll add on lower-order columns of the row
                                968                 :      * comparison below, if possible.
                                969                 :      *
                                970                 :      * The selected scan keys (at most one per index column) are remembered by
                                971                 :      * storing their addresses into the local startKeys[] array.
                                972                 :      *----------
                                973                 :      */
 8595 bruce                     974 GIC     9086547 :     strat_total = BTEqualStrategyNumber;
 8293 tgl                       975         9086547 :     if (so->numberOfKeys > 0)
                                976                 :     {
 7088 tgl                       977 ECB             :         AttrNumber  curattr;
                                978                 :         ScanKey     chosen;
                                979                 :         ScanKey     impliesNN;
                                980                 :         ScanKey     cur;
                                981                 : 
                                982                 :         /*
                                983                 :          * chosen is the so-far-chosen key for the current attribute, if any.
                                984                 :          * We don't cast the decision in stone until we reach keys for the
                                985                 :          * next attribute.
                                986                 :          */
 7088 tgl                       987 GIC     9081689 :         curattr = 1;
                                988         9081689 :         chosen = NULL;
                                989                 :         /* Also remember any scankey that implies a NOT NULL constraint */
 4176 tgl                       990 CBC     9081689 :         impliesNN = NULL;
 6797 bruce                     991 ECB             : 
                                992                 :         /*
 7088 tgl                       993                 :          * Loop iterates from 0 to numberOfKeys inclusive; we use the last
                                994                 :          * pass to handle after-last-key processing.  Actual exit from the
                                995                 :          * loop is at one of the "break" statements below.
                                996                 :          */
 7088 tgl                       997 GIC    24989949 :         for (cur = so->keyData, i = 0;; cur++, i++)
                                998                 :         {
                                999        24989949 :             if (i >= so->numberOfKeys || cur->sk_attno != curattr)
 8595 bruce                    1000 ECB             :             {
                               1001                 :                 /*
 7088 tgl                      1002                 :                  * Done looking at keys for curattr.  If we didn't find a
                               1003                 :                  * usable boundary key, see if we can deduce a NOT NULL key.
                               1004                 :                  */
 4176 tgl                      1005 GIC    15934530 :                 if (chosen == NULL && impliesNN != NULL &&
                               1006           25904 :                     ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
                               1007                 :                      ScanDirectionIsForward(dir) :
 4176 tgl                      1008 ECB             :                      ScanDirectionIsBackward(dir)))
                               1009                 :                 {
                               1010                 :                     /* Yes, so build the key in notnullkeys[keysCount] */
 4176 tgl                      1011 GIC           3 :                     chosen = &notnullkeys[keysCount];
                               1012               3 :                     ScanKeyEntryInitialize(chosen,
                               1013                 :                                            (SK_SEARCHNOTNULL | SK_ISNULL |
 4176 tgl                      1014 CBC           3 :                                             (impliesNN->sk_flags &
 2118 tgl                      1015 ECB             :                                              (SK_BT_DESC | SK_BT_NULLS_FIRST))),
                               1016                 :                                            curattr,
 2118 tgl                      1017 CBC           3 :                                            ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
                               1018                 :                                             BTGreaterStrategyNumber :
                               1019                 :                                             BTLessStrategyNumber),
 4176 tgl                      1020 ECB             :                                            InvalidOid,
                               1021                 :                                            InvalidOid,
                               1022                 :                                            InvalidOid,
                               1023                 :                                            (Datum) 0);
                               1024                 :                 }
                               1025                 : 
                               1026                 :                 /*
                               1027                 :                  * If we still didn't find a usable boundary key, quit; else
                               1028                 :                  * save the boundary key pointer in startKeys.
                               1029                 :                  */
 7088 tgl                      1030 GIC    15908626 :                 if (chosen == NULL)
                               1031           27175 :                     break;
                               1032        15881451 :                 startKeys[keysCount++] = chosen;
 6797 bruce                    1033 ECB             : 
 7088 tgl                      1034                 :                 /*
 6797 bruce                    1035                 :                  * Adjust strat_total, and quit if we have stored a > or <
                               1036                 :                  * key.
                               1037                 :                  */
 7088 tgl                      1038 GIC    15881451 :                 strat = chosen->sk_strategy;
                               1039        15881451 :                 if (strat != BTEqualStrategyNumber)
                               1040                 :                 {
 7088 tgl                      1041 CBC      709058 :                     strat_total = strat;
                               1042          709058 :                     if (strat == BTGreaterStrategyNumber ||
                               1043                 :                         strat == BTLessStrategyNumber)
 7088 tgl                      1044 ECB             :                         break;
                               1045                 :                 }
                               1046                 : 
                               1047                 :                 /*
                               1048                 :                  * Done if that was the last attribute, or if next key is not
                               1049                 :                  * in sequence (implying no boundary key is available for the
                               1050                 :                  * next attribute).
                               1051                 :                  */
 6509 tgl                      1052 GIC    15174487 :                 if (i >= so->numberOfKeys ||
                               1053         6827667 :                     cur->sk_attno != curattr + 1)
                               1054                 :                     break;
 6797 bruce                    1055 ECB             : 
 7088 tgl                      1056                 :                 /*
                               1057                 :                  * Reset for next attr.
                               1058                 :                  */
 7088 tgl                      1059 GIC     6826937 :                 curattr = cur->sk_attno;
                               1060         6826937 :                 chosen = NULL;
 4176                          1061         6826937 :                 impliesNN = NULL;
 8595 bruce                    1062 ECB             :             }
 7088 tgl                      1063                 : 
 4176                          1064                 :             /*
                               1065                 :              * Can we use this key as a starting boundary for this attr?
                               1066                 :              *
                               1067                 :              * If not, does it imply a NOT NULL constraint?  (Because
                               1068                 :              * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
                               1069                 :              * *any* inequality key works for that; we need not test.)
                               1070                 :              */
 7088 tgl                      1071 GIC    15908260 :             switch (cur->sk_strategy)
                               1072                 :             {
                               1073           48301 :                 case BTLessStrategyNumber:
 7088 tgl                      1074 ECB             :                 case BTLessEqualStrategyNumber:
 4176 tgl                      1075 GIC       48301 :                     if (chosen == NULL)
 4176 tgl                      1076 ECB             :                     {
 4176 tgl                      1077 GIC       47399 :                         if (ScanDirectionIsBackward(dir))
 4176 tgl                      1078 CBC       21504 :                             chosen = cur;
                               1079                 :                         else
                               1080           25895 :                             impliesNN = cur;
 4176 tgl                      1081 ECB             :                     }
 7088 tgl                      1082 GIC       48301 :                     break;
 7088 tgl                      1083 CBC    15172393 :                 case BTEqualStrategyNumber:
                               1084                 :                     /* override any non-equality choice */
                               1085        15172393 :                     chosen = cur;
                               1086        15172393 :                     break;
 7088 tgl                      1087 GIC      687566 :                 case BTGreaterEqualStrategyNumber:
 7088 tgl                      1088 ECB             :                 case BTGreaterStrategyNumber:
 4176 tgl                      1089 CBC      687566 :                     if (chosen == NULL)
 4176 tgl                      1090 ECB             :                     {
 4176 tgl                      1091 GIC      687566 :                         if (ScanDirectionIsForward(dir))
 4176 tgl                      1092 CBC      687551 :                             chosen = cur;
                               1093                 :                         else
                               1094              15 :                             impliesNN = cur;
 4176 tgl                      1095 ECB             :                     }
 8762 bruce                    1096 GIC      687566 :                     break;
 8762 bruce                    1097 ECB             :             }
                               1098                 :         }
 9345                          1099                 :     }
                               1100                 : 
                               1101                 :     /*
                               1102                 :      * If we found no usable boundary keys, we have to start from one end of
                               1103                 :      * the tree.  Walk down that edge to the first or last key, and scan from
                               1104                 :      * there.
                               1105                 :      */
 7088 tgl                      1106 GIC     9086547 :     if (keysCount == 0)
                               1107                 :     {
                               1108                 :         bool        match;
 2244 rhaas                    1109 ECB             : 
 2244 rhaas                    1110 GIC       31957 :         match = _bt_endpoint(scan, dir);
                               1111                 : 
                               1112           31957 :         if (!match)
 2244 rhaas                    1113 ECB             :         {
                               1114                 :             /* No match, so mark (parallel) scan finished */
 2244 rhaas                    1115 CBC        3978 :             _bt_parallel_done(scan);
                               1116                 :         }
                               1117                 : 
                               1118           31957 :         return match;
                               1119                 :     }
                               1120                 : 
 9345 bruce                    1121 ECB             :     /*
                               1122                 :      * We want to start the scan somewhere within the index.  Set up an
                               1123                 :      * insertion scankey we can use to search for the boundary point we
                               1124                 :      * identified above.  The insertion scankey is built using the keys
                               1125                 :      * identified by startKeys[].  (Remaining insertion scankey fields are
                               1126                 :      * initialized after initial-positioning strategy is finalized.)
                               1127                 :      */
 6503 tgl                      1128 GIC     9054590 :     Assert(keysCount <= INDEX_MAX_KEYS);
 8397 bruce                    1129        24936029 :     for (i = 0; i < keysCount; i++)
                               1130                 :     {
 7088 tgl                      1131 CBC    15881451 :         ScanKey     cur = startKeys[i];
 8053 bruce                    1132 ECB             : 
 6031 bruce                    1133 GIC    15881451 :         Assert(cur->sk_attno == i + 1);
 6797 bruce                    1134 ECB             : 
 6283 tgl                      1135 GIC    15881451 :         if (cur->sk_flags & SK_ROW_HEADER)
 7088 tgl                      1136 ECB             :         {
                               1137                 :             /*
 6283                          1138                 :              * Row comparison header: look to the first row member instead.
                               1139                 :              *
                               1140                 :              * The member scankeys are already in insertion format (ie, they
                               1141                 :              * have sk_func = 3-way-comparison function), but we have to watch
                               1142                 :              * out for nulls, which _bt_preprocess_keys didn't check. A null
                               1143                 :              * in the first row member makes the condition unmatchable, just
                               1144                 :              * like qual_ok = false.
                               1145                 :              */
 5934 tgl                      1146 GIC          12 :             ScanKey     subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
                               1147                 : 
                               1148              12 :             Assert(subkey->sk_flags & SK_ROW_MEMBER);
 5934 tgl                      1149 CBC          12 :             if (subkey->sk_flags & SK_ISNULL)
                               1150                 :             {
 2244 rhaas                    1151 LBC           0 :                 _bt_parallel_done(scan);
 6283 tgl                      1152               0 :                 return false;
                               1153                 :             }
 1481 pg                       1154 GBC          12 :             memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
 6031 bruce                    1155 EUB             : 
                               1156                 :             /*
 6283 tgl                      1157 ECB             :              * If the row comparison is the last positioning key we accepted,
                               1158                 :              * try to add additional keys from the lower-order row members.
                               1159                 :              * (If we accepted independent conditions on additional index
                               1160                 :              * columns, we use those instead --- doesn't seem worth trying to
                               1161                 :              * determine which is more restrictive.)  Note that this is OK
                               1162                 :              * even if the row comparison is of ">" or "<" type, because the
                               1163                 :              * condition applied to all but the last row member is effectively
                               1164                 :              * ">=" or "<=", and so the extra keys don't break the positioning
                               1165                 :              * scheme.  But, by the same token, if we aren't able to use all
                               1166                 :              * the row members, then the part of the row comparison that we
                               1167                 :              * did use has to be treated as just a ">=" or "<=" condition, and
                               1168                 :              * so we'd better adjust strat_total accordingly.
                               1169                 :              */
 6283 tgl                      1170 GIC          12 :             if (i == keysCount - 1)
                               1171                 :             {
 5934                          1172              12 :                 bool        used_all_subkeys = false;
 5934 tgl                      1173 ECB             : 
 5934 tgl                      1174 GIC          12 :                 Assert(!(subkey->sk_flags & SK_ROW_END));
 5624 bruce                    1175 ECB             :                 for (;;)
                               1176                 :                 {
 5934 tgl                      1177 CBC          12 :                     subkey++;
 5934 tgl                      1178 GIC          12 :                     Assert(subkey->sk_flags & SK_ROW_MEMBER);
                               1179              12 :                     if (subkey->sk_attno != keysCount + 1)
 6283 tgl                      1180 LBC           0 :                         break;  /* out-of-sequence, can't use it */
 5934 tgl                      1181 CBC          12 :                     if (subkey->sk_strategy != cur->sk_strategy)
 5934 tgl                      1182 LBC           0 :                         break;  /* wrong direction, can't use it */
 5934 tgl                      1183 GBC          12 :                     if (subkey->sk_flags & SK_ISNULL)
 6283 tgl                      1184 LBC           0 :                         break;  /* can't use null keys */
 6283 tgl                      1185 GBC          12 :                     Assert(keysCount < INDEX_MAX_KEYS);
 1481 pg                       1186 CBC          12 :                     memcpy(inskey.scankeys + keysCount, subkey,
 1481 pg                       1187 EUB             :                            sizeof(ScanKeyData));
 6283 tgl                      1188 CBC          12 :                     keysCount++;
 5934                          1189              12 :                     if (subkey->sk_flags & SK_ROW_END)
                               1190                 :                     {
                               1191              12 :                         used_all_subkeys = true;
                               1192              12 :                         break;
                               1193                 :                     }
 5934 tgl                      1194 ECB             :                 }
 5934 tgl                      1195 CBC          12 :                 if (!used_all_subkeys)
                               1196                 :                 {
 5934 tgl                      1197 UIC           0 :                     switch (strat_total)
 5934 tgl                      1198 ECB             :                     {
 5934 tgl                      1199 UIC           0 :                         case BTLessStrategyNumber:
 5934 tgl                      1200 UBC           0 :                             strat_total = BTLessEqualStrategyNumber;
 5934 tgl                      1201 UIC           0 :                             break;
 5934 tgl                      1202 UBC           0 :                         case BTGreaterStrategyNumber:
                               1203               0 :                             strat_total = BTGreaterEqualStrategyNumber;
                               1204               0 :                             break;
 5934 tgl                      1205 EUB             :                     }
 6283                          1206                 :                 }
 6283 tgl                      1207 GBC          12 :                 break;          /* done with outer loop */
                               1208                 :             }
                               1209                 :         }
 7088 tgl                      1210 ECB             :         else
                               1211                 :         {
                               1212                 :             /*
                               1213                 :              * Ordinary comparison key.  Transform the search-style scan key
                               1214                 :              * to an insertion scan key by replacing the sk_func with the
                               1215                 :              * appropriate btree comparison function.
                               1216                 :              *
                               1217                 :              * If scankey operator is not a cross-type comparison, we can use
                               1218                 :              * the cached comparison function; otherwise gotta look it up in
                               1219                 :              * the catalogs.  (That can't lead to infinite recursion, since no
                               1220                 :              * indexscan initiated by syscache lookup will use cross-data-type
                               1221                 :              * operators.)
                               1222                 :              *
                               1223                 :              * We support the convention that sk_subtype == InvalidOid means
                               1224                 :              * the opclass input type; this is a hack to simplify life for
                               1225                 :              * ScanKeyInit().
                               1226                 :              */
 5951 tgl                      1227 GIC    15881439 :             if (cur->sk_subtype == rel->rd_opcintype[i] ||
                               1228        15621462 :                 cur->sk_subtype == InvalidOid)
 6283                          1229        15875108 :             {
 6283 tgl                      1230 ECB             :                 FmgrInfo   *procinfo;
                               1231                 : 
 6283 tgl                      1232 CBC    15875108 :                 procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
 1481 pg                       1233 GIC    15875108 :                 ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
                               1234                 :                                                cur->sk_flags,
 6283 tgl                      1235 CBC    15875108 :                                                cur->sk_attno,
 6283 tgl                      1236 ECB             :                                                InvalidStrategy,
                               1237                 :                                                cur->sk_subtype,
 4380                          1238                 :                                                cur->sk_collation,
                               1239                 :                                                procinfo,
                               1240                 :                                                cur->sk_argument);
                               1241                 :             }
                               1242                 :             else
                               1243                 :             {
                               1244                 :                 RegProcedure cmp_proc;
                               1245                 : 
 5951 tgl                      1246 GIC        6331 :                 cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
                               1247            6331 :                                              rel->rd_opcintype[i],
                               1248                 :                                              cur->sk_subtype,
 5951 tgl                      1249 ECB             :                                              BTORDER_PROC);
 5951 tgl                      1250 CBC        6331 :                 if (!RegProcedureIsValid(cmp_proc))
 5951 tgl                      1251 UIC           0 :                     elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
                               1252                 :                          BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
 5951 tgl                      1253 ECB             :                          cur->sk_attno, RelationGetRelationName(rel));
 1481 pg                       1254 GBC        6331 :                 ScanKeyEntryInitialize(inskey.scankeys + i,
                               1255                 :                                        cur->sk_flags,
 6283 tgl                      1256 GIC        6331 :                                        cur->sk_attno,
 6283 tgl                      1257 ECB             :                                        InvalidStrategy,
                               1258                 :                                        cur->sk_subtype,
 4380                          1259                 :                                        cur->sk_collation,
                               1260                 :                                        cmp_proc,
                               1261                 :                                        cur->sk_argument);
                               1262                 :             }
                               1263                 :         }
                               1264                 :     }
                               1265                 : 
                               1266                 :     /*----------
                               1267                 :      * Examine the selected initial-positioning strategy to determine exactly
                               1268                 :      * where we need to start the scan, and set flag variables to control the
                               1269                 :      * code below.
                               1270                 :      *
                               1271                 :      * If nextkey = false, _bt_search and _bt_binsrch will locate the first
                               1272                 :      * item >= scan key.  If nextkey = true, they will locate the first
                               1273                 :      * item > scan key.
                               1274                 :      *
                               1275                 :      * If goback = true, we will then step back one item, while if
                               1276                 :      * goback = false, we will start the scan on the located item.
                               1277                 :      *----------
                               1278                 :      */
 7049 tgl                      1279 GIC     9054590 :     switch (strat_total)
                               1280                 :     {
                               1281           21507 :         case BTLessStrategyNumber:
 6797 bruce                    1282 ECB             : 
                               1283                 :             /*
 6385                          1284                 :              * Find first item >= scankey, then back up one to arrive at last
                               1285                 :              * item < scankey.  (Note: this positioning strategy is only used
                               1286                 :              * for a backward scan, so that is always the correct starting
                               1287                 :              * position.)
                               1288                 :              */
 7049 tgl                      1289 GIC       21507 :             nextkey = false;
                               1290           21507 :             goback = true;
                               1291           21507 :             break;
 7049 tgl                      1292 ECB             : 
 7049 tgl                      1293 LBC           0 :         case BTLessEqualStrategyNumber:
 6797 bruce                    1294 ECB             : 
                               1295                 :             /*
 6385 bruce                    1296 EUB             :              * Find first item > scankey, then back up one to arrive at last
                               1297                 :              * item <= scankey.  (Note: this positioning strategy is only used
                               1298                 :              * for a backward scan, so that is always the correct starting
                               1299                 :              * position.)
                               1300                 :              */
 7049 tgl                      1301 UIC           0 :             nextkey = true;
                               1302               0 :             goback = true;
                               1303               0 :             break;
 7049 tgl                      1304 EUB             : 
 7049 tgl                      1305 GBC     8345532 :         case BTEqualStrategyNumber:
 6797 bruce                    1306 EUB             : 
                               1307                 :             /*
 6385 bruce                    1308 ECB             :              * If a backward scan was specified, need to start with last equal
                               1309                 :              * item not first one.
                               1310                 :              */
 7049 tgl                      1311 GIC     8345532 :             if (ScanDirectionIsBackward(dir))
                               1312                 :             {
                               1313                 :                 /*
 6385 bruce                    1314 ECB             :                  * This is the same as the <= strategy.  We will check at the
                               1315                 :                  * end whether the found item is actually =.
                               1316                 :                  */
 7049 tgl                      1317 GIC          64 :                 nextkey = true;
                               1318              64 :                 goback = true;
                               1319                 :             }
 7049 tgl                      1320 ECB             :             else
                               1321                 :             {
                               1322                 :                 /*
                               1323                 :                  * This is the same as the >= strategy.  We will check at the
                               1324                 :                  * end whether the found item is actually =.
                               1325                 :                  */
 7049 tgl                      1326 GIC     8345468 :                 nextkey = false;
                               1327         8345468 :                 goback = false;
                               1328                 :             }
 7049 tgl                      1329 CBC     8345532 :             break;
 7049 tgl                      1330 ECB             : 
 7049 tgl                      1331 GIC        2094 :         case BTGreaterEqualStrategyNumber:
 6797 bruce                    1332 ECB             : 
                               1333                 :             /*
 3260                          1334                 :              * Find first item >= scankey.  (This is only used for forward
                               1335                 :              * scans.)
                               1336                 :              */
 7049 tgl                      1337 GIC        2094 :             nextkey = false;
                               1338            2094 :             goback = false;
                               1339            2094 :             break;
 7049 tgl                      1340 ECB             : 
 7049 tgl                      1341 CBC      685457 :         case BTGreaterStrategyNumber:
 6797 bruce                    1342 ECB             : 
                               1343                 :             /*
                               1344                 :              * Find first item > scankey.  (This is only used for forward
                               1345                 :              * scans.)
                               1346                 :              */
 7049 tgl                      1347 GIC      685457 :             nextkey = true;
                               1348          685457 :             goback = false;
                               1349          685457 :             break;
 7049 tgl                      1350 ECB             : 
 7049 tgl                      1351 LBC           0 :         default:
 7049 tgl                      1352 ECB             :             /* can't get here, but keep compiler quiet */
 7049 tgl                      1353 UIC           0 :             elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
 7049 tgl                      1354 EUB             :             return false;
                               1355                 :     }
                               1356                 : 
                               1357                 :     /* Initialize remaining insertion scan key fields */
    8 andres                   1358 GNC     9054590 :     _bt_metaversion(rel, heaprel, &inskey.heapkeyspace, &inskey.allequalimage);
 1418 tgl                      1359 GIC     9054590 :     inskey.anynullkeys = false; /* unused */
 1481 pg                       1360         9054590 :     inskey.nextkey = nextkey;
 1481 pg                       1361 CBC     9054590 :     inskey.pivotsearch = false;
                               1362         9054590 :     inskey.scantid = NULL;
                               1363         9054590 :     inskey.keysz = keysCount;
 1481 pg                       1364 ECB             : 
 9345 bruce                    1365                 :     /*
 6291 tgl                      1366                 :      * Use the manufactured insertion scan key to descend the tree and
                               1367                 :      * position ourselves on the target leaf page.
                               1368                 :      */
    8 andres                   1369 GNC     9054590 :     stack = _bt_search(rel, heaprel, &inskey, &buf, BT_READ, scan->xs_snapshot);
                               1370                 : 
                               1371                 :     /* don't need to keep the stack around... */
 8297 tgl                      1372 CBC     9054590 :     _bt_freestack(stack);
                               1373                 : 
 8053 bruce                    1374 GIC     9054590 :     if (!BufferIsValid(buf))
 9345 bruce                    1375 ECB             :     {
                               1376                 :         /*
 4315 heikki.linnakangas       1377                 :          * We only get here if the index is completely empty. Lock relation
                               1378                 :          * because nothing finer to lock exists.
                               1379                 :          */
 4316 heikki.linnakangas       1380 GIC      385759 :         PredicateLockRelation(rel, scan->xs_snapshot);
                               1381                 : 
                               1382                 :         /*
 2244 rhaas                    1383 ECB             :          * mark parallel scan as done, so that all the workers can finish
                               1384                 :          * their scan
                               1385                 :          */
 2244 rhaas                    1386 GIC      385759 :         _bt_parallel_done(scan);
                               1387          385759 :         BTScanPosInvalidate(so->currPos);
                               1388                 : 
 7629 tgl                      1389 CBC      385759 :         return false;
 9445 vadim4o                  1390 ECB             :     }
                               1391                 :     else
 4316 heikki.linnakangas       1392 CBC     8668831 :         PredicateLockPage(rel, BufferGetBlockNumber(buf),
                               1393                 :                           scan->xs_snapshot);
                               1394                 : 
 2244 rhaas                    1395         8668831 :     _bt_initialize_more_data(so, dir);
                               1396                 : 
                               1397                 :     /* position to the precise item on the page */
 1481 pg                       1398         8668831 :     offnum = _bt_binsrch(rel, &inskey, buf);
                               1399                 : 
                               1400                 :     /*
 6385 bruce                    1401 ECB             :      * If nextkey = false, we are positioned at the first item >= scan key, or
                               1402                 :      * possibly at the end of a page on which all the existing items are less
                               1403                 :      * than the scan key and we know that everything on later pages is greater
                               1404                 :      * than or equal to scan key.
                               1405                 :      *
                               1406                 :      * If nextkey = true, we are positioned at the first item > scan key, or
                               1407                 :      * possibly at the end of a page on which all the existing items are less
                               1408                 :      * than or equal to the scan key and we know that everything on later
                               1409                 :      * pages is greater than scan key.
                               1410                 :      *
                               1411                 :      * The actually desired starting point is either this item or the prior
                               1412                 :      * one, or in the end-of-page case it's the first item on the next page or
                               1413                 :      * the last item on this page.  Adjust the starting offset if needed. (If
                               1414                 :      * this results in an offset before the first item or after the last one,
                               1415                 :      * _bt_readpage will report no items found, and then we'll step to the
                               1416                 :      * next page as needed.)
                               1417                 :      */
 7049 tgl                      1418 GIC     8668831 :     if (goback)
 6181                          1419           21567 :         offnum = OffsetNumberPrev(offnum);
                               1420                 : 
 2937 kgrittn                  1421 ECB             :     /* remember which buffer we have pinned, if any */
 2937 kgrittn                  1422 CBC     8668831 :     Assert(!BTScanPosIsValid(so->currPos));
 2937 kgrittn                  1423 GIC     8668831 :     so->currPos.buf = buf;
                               1424                 : 
 6181 tgl                      1425 ECB             :     /*
                               1426                 :      * Now load data from the first page of the scan.
                               1427                 :      */
 6181 tgl                      1428 GIC     8668831 :     if (!_bt_readpage(scan, dir, offnum))
                               1429                 :     {
                               1430                 :         /*
 6181 tgl                      1431 ECB             :          * There's no actually-matching data on this page.  Try to advance to
                               1432                 :          * the next page.  Return false if there's no matching data at all.
                               1433                 :          */
  992 pg                       1434 GIC     3688427 :         _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
 6181 tgl                      1435         3688427 :         if (!_bt_steppage(scan, dir))
 7049                          1436         3688414 :             return false;
 7049 tgl                      1437 ECB             :     }
 2937 kgrittn                  1438                 :     else
                               1439                 :     {
                               1440                 :         /* Drop the lock, and maybe the pin, on the current page */
 2937 kgrittn                  1441 GIC     4980404 :         _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
                               1442                 :     }
                               1443                 : 
 2244 rhaas                    1444 CBC     4980421 : readcomplete:
                               1445                 :     /* OK, itemIndex says what to return */
 4200 tgl                      1446 GIC     4980421 :     currItem = &so->currPos.items[so->currPos.itemIndex];
 1490 andres                   1447 CBC     4980421 :     scan->xs_heaptid = currItem->heapTid;
 4200 tgl                      1448 GIC     4980421 :     if (scan->xs_want_itup)
 4200 tgl                      1449 CBC       64578 :         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
 6181 tgl                      1450 ECB             : 
 6181 tgl                      1451 CBC     4980421 :     return true;
 6181 tgl                      1452 ECB             : }
                               1453                 : 
                               1454                 : /*
                               1455                 :  *  _bt_next() -- Get the next item in a scan.
                               1456                 :  *
                               1457                 :  *      On entry, so->currPos describes the current page, which may be pinned
                               1458                 :  *      but is not locked, and so->currPos.itemIndex identifies which item was
                               1459                 :  *      previously returned.
                               1460                 :  *
                               1461                 :  *      On successful exit, scan->xs_ctup.t_self is set to the TID of the
                               1462                 :  *      next heap tuple, and if requested, scan->xs_itup points to a copy of
                               1463                 :  *      the index tuple.  so->currPos is updated as needed.
                               1464                 :  *
                               1465                 :  *      On failure exit (no more tuples), we release pin and set
                               1466                 :  *      so->currPos.buf to InvalidBuffer.
                               1467                 :  */
                               1468                 : bool
 6181 tgl                      1469 GIC    11957219 : _bt_next(IndexScanDesc scan, ScanDirection dir)
                               1470                 : {
                               1471        11957219 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 4200 tgl                      1472 ECB             :     BTScanPosItem *currItem;
                               1473                 : 
 6181                          1474                 :     /*
                               1475                 :      * Advance to next tuple on current page; or if there's no more, try to
                               1476                 :      * step to the next page with data.
                               1477                 :      */
 6181 tgl                      1478 GIC    11957219 :     if (ScanDirectionIsForward(dir))
                               1479                 :     {
                               1480        11948015 :         if (++so->currPos.itemIndex > so->currPos.lastItem)
 6181 tgl                      1481 ECB             :         {
 6181 tgl                      1482 GIC     1161767 :             if (!_bt_steppage(scan, dir))
 6181 tgl                      1483 CBC     1147466 :                 return false;
                               1484                 :         }
 6181 tgl                      1485 ECB             :     }
 7049                          1486                 :     else
                               1487                 :     {
 6181 tgl                      1488 GIC        9204 :         if (--so->currPos.itemIndex < so->currPos.firstItem)
                               1489                 :         {
                               1490              37 :             if (!_bt_steppage(scan, dir))
 7629 tgl                      1491 CBC          31 :                 return false;
                               1492                 :         }
 9445 vadim4o                  1493 ECB             :     }
 9345 bruce                    1494                 : 
                               1495                 :     /* OK, itemIndex says what to return */
 4200 tgl                      1496 GIC    10809722 :     currItem = &so->currPos.items[so->currPos.itemIndex];
 1490 andres                   1497        10809722 :     scan->xs_heaptid = currItem->heapTid;
 4200 tgl                      1498        10809722 :     if (scan->xs_want_itup)
 4200 tgl                      1499 CBC     1788657 :         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
 9345 bruce                    1500 ECB             : 
 6181 tgl                      1501 CBC    10809722 :     return true;
 6181 tgl                      1502 ECB             : }
                               1503                 : 
                               1504                 : /*
                               1505                 :  *  _bt_readpage() -- Load data from current index page into so->currPos
                               1506                 :  *
                               1507                 :  * Caller must have pinned and read-locked so->currPos.buf; the buffer's state
                               1508                 :  * is not changed here.  Also, currPos.moreLeft and moreRight must be valid;
                               1509                 :  * they are updated as appropriate.  All other fields of so->currPos are
                               1510                 :  * initialized from scratch here.
                               1511                 :  *
                               1512                 :  * We scan the current page starting at offnum and moving in the indicated
                               1513                 :  * direction.  All items matching the scan keys are loaded into currPos.items.
                               1514                 :  * moreLeft or moreRight (as appropriate) is cleared if _bt_checkkeys reports
                               1515                 :  * that there can be no more matching tuples in the current scan direction.
                               1516                 :  *
                               1517                 :  * In the case of a parallel scan, caller must have called _bt_parallel_seize
                               1518                 :  * prior to calling this function; this function will invoke
                               1519                 :  * _bt_parallel_release before returning.
                               1520                 :  *
                               1521                 :  * Returns true if any matching items found on the page, false if none.
                               1522                 :  */
                               1523                 : static bool
 6181 tgl                      1524 GIC     8712708 : _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
                               1525                 : {
                               1526         8712708 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 6181 tgl                      1527 ECB             :     Page        page;
                               1528                 :     BTPageOpaque opaque;
                               1529                 :     OffsetNumber minoff;
                               1530                 :     OffsetNumber maxoff;
                               1531                 :     int         itemIndex;
                               1532                 :     bool        continuescan;
                               1533                 :     int         indnatts;
                               1534                 : 
                               1535                 :     /*
                               1536                 :      * We must have the buffer pinned and locked, but the usual macro can't be
                               1537                 :      * used here; this function is what makes it good for currPos.
                               1538                 :      */
 6181 tgl                      1539 GIC     8712708 :     Assert(BufferIsValid(so->currPos.buf));
                               1540                 : 
 2545 kgrittn                  1541         8712708 :     page = BufferGetPage(so->currPos.buf);
  373 michael                  1542 CBC     8712708 :     opaque = BTPageGetOpaque(page);
                               1543                 : 
 2244 rhaas                    1544 ECB             :     /* allow next page be processed by parallel worker */
 2244 rhaas                    1545 CBC     8712708 :     if (scan->parallel_scan)
                               1546                 :     {
 2244 rhaas                    1547 GIC         644 :         if (ScanDirectionIsForward(dir))
 2244 rhaas                    1548 CBC         644 :             _bt_parallel_release(scan, opaque->btpo_next);
                               1549                 :         else
 2244 rhaas                    1550 LBC           0 :             _bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf));
 2244 rhaas                    1551 ECB             :     }
                               1552                 : 
 1478 pg                       1553 GBC     8712708 :     continuescan = true;        /* default assumption */
 1478 pg                       1554 GIC     8712708 :     indnatts = IndexRelationGetNumberOfAttributes(scan->indexRelation);
 6181 tgl                      1555         8712708 :     minoff = P_FIRSTDATAKEY(opaque);
 6181 tgl                      1556 CBC     8712708 :     maxoff = PageGetMaxOffsetNumber(page);
 6181 tgl                      1557 ECB             : 
 2937 kgrittn                  1558                 :     /*
                               1559                 :      * We note the buffer's block number so that we can release the pin later.
                               1560                 :      * This allows us to re-read the buffer if it is needed again for hinting.
                               1561                 :      */
 2937 kgrittn                  1562 GIC     8712708 :     so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
                               1563                 : 
                               1564                 :     /*
 2937 kgrittn                  1565 ECB             :      * We save the LSN of the page as we read it, so that we know whether it
                               1566                 :      * safe to apply LP_DEAD hints to the page later.  This allows us to drop
                               1567                 :      * the pin for MVCC scans, which allows vacuum to avoid blocking.
                               1568                 :      */
 1916 alvherre                 1569 GIC     8712708 :     so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
                               1570                 : 
                               1571                 :     /*
 6181 tgl                      1572 ECB             :      * we must save the page's right-link while scanning it; this tells us
                               1573                 :      * where to step right to after we're done with these items.  There is no
                               1574                 :      * corresponding need for the left-link, since splits always go right.
                               1575                 :      */
 6181 tgl                      1576 GIC     8712708 :     so->currPos.nextPage = opaque->btpo_next;
                               1577                 : 
                               1578                 :     /* initialize tuple workspace to empty */
 4200 tgl                      1579 CBC     8712708 :     so->currPos.nextTupleOffset = 0;
                               1580                 : 
                               1581                 :     /*
 2937 kgrittn                  1582 ECB             :      * Now that the current page has been made consistent, the macro should be
                               1583                 :      * good.
                               1584                 :      */
 2937 kgrittn                  1585 GIC     8712708 :     Assert(BTScanPosIsPinned(so->currPos));
                               1586                 : 
 6181 tgl                      1587         8712708 :     if (ScanDirectionIsForward(dir))
 8762 bruce                    1588 ECB             :     {
                               1589                 :         /* load items[] in ascending order */
 6181 tgl                      1590 CBC     8691103 :         itemIndex = 0;
                               1591                 : 
 6181 tgl                      1592 GIC     8691103 :         offnum = Max(offnum, minoff);
 6181 tgl                      1593 ECB             : 
 6181 tgl                      1594 GIC    30578310 :         while (offnum <= maxoff)
 6181 tgl                      1595 ECB             :         {
 1478 pg                       1596 GIC    27999519 :             ItemId      iid = PageGetItemId(page, offnum);
 1478 pg                       1597 ECB             :             IndexTuple  itup;
                               1598                 : 
                               1599                 :             /*
                               1600                 :              * If the scan specifies not to return killed tuples, then we
                               1601                 :              * treat a killed tuple as not passing the qual
                               1602                 :              */
 1478 pg                       1603 GIC    27999519 :             if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
                               1604                 :             {
                               1605         1157894 :                 offnum = OffsetNumberNext(offnum);
 1478 pg                       1606 CBC     1157894 :                 continue;
                               1607                 :             }
 1478 pg                       1608 ECB             : 
 1478 pg                       1609 CBC    26841625 :             itup = (IndexTuple) PageGetItem(page, iid);
                               1610                 : 
 1478 pg                       1611 GIC    26841625 :             if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
 6181 tgl                      1612 ECB             :             {
                               1613                 :                 /* tuple passes all scan key conditions */
 1138 pg                       1614 CBC    20556895 :                 if (!BTreeTupleIsPosting(itup))
                               1615                 :                 {
                               1616                 :                     /* Remember it */
                               1617        20272487 :                     _bt_saveitem(so, itemIndex, offnum, itup);
 1138 pg                       1618 GIC    20272487 :                     itemIndex++;
                               1619                 :                 }
 1138 pg                       1620 ECB             :                 else
                               1621                 :                 {
                               1622                 :                     int         tupleOffset;
                               1623                 : 
                               1624                 :                     /*
                               1625                 :                      * Set up state to return posting list, and remember first
                               1626                 :                      * TID
                               1627                 :                      */
                               1628                 :                     tupleOffset =
 1138 pg                       1629 GIC      284408 :                         _bt_setuppostingitems(so, itemIndex, offnum,
                               1630                 :                                               BTreeTupleGetPostingN(itup, 0),
                               1631                 :                                               itup);
 1138 pg                       1632 CBC      284408 :                     itemIndex++;
                               1633                 :                     /* Remember additional TIDs */
 1138 pg                       1634 GIC     1444153 :                     for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
 1138 pg                       1635 ECB             :                     {
 1138 pg                       1636 GIC     1159745 :                         _bt_savepostingitem(so, itemIndex, offnum,
 1138 pg                       1637 ECB             :                                             BTreeTupleGetPostingN(itup, i),
                               1638                 :                                             tupleOffset);
 1138 pg                       1639 CBC     1159745 :                         itemIndex++;
                               1640                 :                     }
                               1641                 :                 }
 6181 tgl                      1642 ECB             :             }
                               1643                 :             /* When !continuescan, there can't be any more matches, so stop */
 6181 tgl                      1644 GIC    26841625 :             if (!continuescan)
                               1645         6112312 :                 break;
                               1646                 : 
 6181 tgl                      1647 CBC    20729313 :             offnum = OffsetNumberNext(offnum);
 6181 tgl                      1648 ECB             :         }
                               1649                 : 
 1478 pg                       1650                 :         /*
                               1651                 :          * We don't need to visit page to the right when the high key
                               1652                 :          * indicates that no more matches will be found there.
                               1653                 :          *
                               1654                 :          * Checking the high key like this works out more often than you might
                               1655                 :          * think.  Leaf page splits pick a split point between the two most
                               1656                 :          * dissimilar tuples (this is weighed against the need to evenly share
                               1657                 :          * free space).  Leaf pages with high key attribute values that can
                               1658                 :          * only appear on non-pivot tuples on the right sibling page are
                               1659                 :          * common.
                               1660                 :          */
 1478 pg                       1661 GIC     8691103 :         if (continuescan && !P_RIGHTMOST(opaque))
                               1662                 :         {
                               1663           62692 :             ItemId      iid = PageGetItemId(page, P_HIKEY);
 1478 pg                       1664 CBC       62692 :             IndexTuple  itup = (IndexTuple) PageGetItem(page, iid);
                               1665                 :             int         truncatt;
 1478 pg                       1666 ECB             : 
 1478 pg                       1667 CBC       62692 :             truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
 1478 pg                       1668 GIC       62692 :             _bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
                               1669                 :         }
 1478 pg                       1670 ECB             : 
 1478 pg                       1671 CBC     8691103 :         if (!continuescan)
 1478 pg                       1672 GIC     6147219 :             so->currPos.moreRight = false;
                               1673                 : 
 1138 pg                       1674 CBC     8691103 :         Assert(itemIndex <= MaxTIDsPerBTreePage);
 6181 tgl                      1675         8691103 :         so->currPos.firstItem = 0;
 6181 tgl                      1676 GIC     8691103 :         so->currPos.lastItem = itemIndex - 1;
 6181 tgl                      1677 CBC     8691103 :         so->currPos.itemIndex = 0;
 8762 bruce                    1678 ECB             :     }
 9345                          1679                 :     else
                               1680                 :     {
                               1681                 :         /* load items[] in descending order */
 1138 pg                       1682 GIC       21605 :         itemIndex = MaxTIDsPerBTreePage;
                               1683                 : 
 6181 tgl                      1684           21605 :         offnum = Min(offnum, maxoff);
 6181 tgl                      1685 ECB             : 
 6181 tgl                      1686 GIC     4336927 :         while (offnum >= minoff)
 6181 tgl                      1687 ECB             :         {
 1478 pg                       1688 GIC     4315364 :             ItemId      iid = PageGetItemId(page, offnum);
 1478 pg                       1689 ECB             :             IndexTuple  itup;
                               1690                 :             bool        tuple_alive;
                               1691                 :             bool        passes_quals;
                               1692                 : 
                               1693                 :             /*
                               1694                 :              * If the scan specifies not to return killed tuples, then we
                               1695                 :              * treat a killed tuple as not passing the qual.  Most of the
                               1696                 :              * time, it's a win to not bother examining the tuple's index
                               1697                 :              * keys, but just skip to the next tuple (previous, actually,
                               1698                 :              * since we're scanning backwards).  However, if this is the first
                               1699                 :              * tuple on the page, we do check the index keys, to prevent
                               1700                 :              * uselessly advancing to the page to the left.  This is similar
                               1701                 :              * to the high key optimization used by forward scans.
                               1702                 :              */
 1478 pg                       1703 GIC     4315364 :             if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
                               1704                 :             {
                               1705          490854 :                 Assert(offnum >= P_FIRSTDATAKEY(opaque));
 1478 pg                       1706 CBC      490854 :                 if (offnum > P_FIRSTDATAKEY(opaque))
                               1707                 :                 {
                               1708          490675 :                     offnum = OffsetNumberPrev(offnum);
                               1709          490675 :                     continue;
                               1710                 :                 }
 1478 pg                       1711 ECB             : 
 1478 pg                       1712 CBC         179 :                 tuple_alive = false;
                               1713                 :             }
                               1714                 :             else
                               1715         3824510 :                 tuple_alive = true;
                               1716                 : 
 1478 pg                       1717 GIC     3824689 :             itup = (IndexTuple) PageGetItem(page, iid);
 1478 pg                       1718 ECB             : 
 1478 pg                       1719 GIC     3824689 :             passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
 1478 pg                       1720 ECB             :                                          &continuescan);
 1478 pg                       1721 GIC     3824689 :             if (passes_quals && tuple_alive)
 6181 tgl                      1722 ECB             :             {
                               1723                 :                 /* tuple passes all scan key conditions */
 1138 pg                       1724 CBC     3824468 :                 if (!BTreeTupleIsPosting(itup))
                               1725                 :                 {
                               1726                 :                     /* Remember it */
                               1727         3798388 :                     itemIndex--;
 1138 pg                       1728 GIC     3798388 :                     _bt_saveitem(so, itemIndex, offnum, itup);
                               1729                 :                 }
 1138 pg                       1730 ECB             :                 else
                               1731                 :                 {
                               1732                 :                     int         tupleOffset;
                               1733                 : 
                               1734                 :                     /*
                               1735                 :                      * Set up state to return posting list, and remember first
                               1736                 :                      * TID.
                               1737                 :                      *
                               1738                 :                      * Note that we deliberately save/return items from
                               1739                 :                      * posting lists in ascending heap TID order for backwards
                               1740                 :                      * scans.  This allows _bt_killitems() to make a
                               1741                 :                      * consistent assumption about the order of items
                               1742                 :                      * associated with the same posting list tuple.
                               1743                 :                      */
 1138 pg                       1744 GIC       26080 :                     itemIndex--;
                               1745                 :                     tupleOffset =
                               1746           26080 :                         _bt_setuppostingitems(so, itemIndex, offnum,
 1138 pg                       1747 ECB             :                                               BTreeTupleGetPostingN(itup, 0),
                               1748                 :                                               itup);
                               1749                 :                     /* Remember additional TIDs */
 1138 pg                       1750 GIC       76589 :                     for (int i = 1; i < BTreeTupleGetNPosting(itup); i++)
                               1751                 :                     {
                               1752           50509 :                         itemIndex--;
 1138 pg                       1753 CBC       50509 :                         _bt_savepostingitem(so, itemIndex, offnum,
                               1754                 :                                             BTreeTupleGetPostingN(itup, i),
 1138 pg                       1755 ECB             :                                             tupleOffset);
                               1756                 :                     }
                               1757                 :                 }
                               1758                 :             }
 6181 tgl                      1759 GIC     3824689 :             if (!continuescan)
                               1760                 :             {
                               1761                 :                 /* there can't be any more matches, so stop */
 6181 tgl                      1762 CBC          42 :                 so->currPos.moreLeft = false;
 6181 tgl                      1763 GIC          42 :                 break;
                               1764                 :             }
 6181 tgl                      1765 ECB             : 
 6181 tgl                      1766 CBC     3824647 :             offnum = OffsetNumberPrev(offnum);
                               1767                 :         }
                               1768                 : 
                               1769           21605 :         Assert(itemIndex >= 0);
 6181 tgl                      1770 GIC       21605 :         so->currPos.firstItem = itemIndex;
 1138 pg                       1771           21605 :         so->currPos.lastItem = MaxTIDsPerBTreePage - 1;
 1138 pg                       1772 CBC       21605 :         so->currPos.itemIndex = MaxTIDsPerBTreePage - 1;
 9345 bruce                    1773 ECB             :     }
                               1774                 : 
 6181 tgl                      1775 CBC     8712708 :     return (so->currPos.firstItem <= so->currPos.lastItem);
                               1776                 : }
                               1777                 : 
 4200 tgl                      1778 ECB             : /* Save an index item into so->currPos.items[itemIndex] */
                               1779                 : static void
 4200 tgl                      1780 GIC    24070875 : _bt_saveitem(BTScanOpaque so, int itemIndex,
                               1781                 :              OffsetNumber offnum, IndexTuple itup)
                               1782                 : {
 4200 tgl                      1783 CBC    24070875 :     BTScanPosItem *currItem = &so->currPos.items[itemIndex];
                               1784                 : 
 1138 pg                       1785 GIC    24070875 :     Assert(!BTreeTupleIsPivot(itup) && !BTreeTupleIsPosting(itup));
 1138 pg                       1786 ECB             : 
 4200 tgl                      1787 GIC    24070875 :     currItem->heapTid = itup->t_tid;
 4200 tgl                      1788 CBC    24070875 :     currItem->indexOffset = offnum;
 4200 tgl                      1789 GIC    24070875 :     if (so->currTuples)
 4200 tgl                      1790 ECB             :     {
 4200 tgl                      1791 CBC     9999342 :         Size        itupsz = IndexTupleSize(itup);
 4200 tgl                      1792 ECB             : 
 4200 tgl                      1793 GIC     9999342 :         currItem->tupleOffset = so->currPos.nextTupleOffset;
 4200 tgl                      1794 CBC     9999342 :         memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
 4200 tgl                      1795 GIC     9999342 :         so->currPos.nextTupleOffset += MAXALIGN(itupsz);
 4200 tgl                      1796 ECB             :     }
 4200 tgl                      1797 CBC    24070875 : }
 4200 tgl                      1798 ECB             : 
                               1799                 : /*
 1138 pg                       1800                 :  * Setup state to save TIDs/items from a single posting list tuple.
                               1801                 :  *
                               1802                 :  * Saves an index item into so->currPos.items[itemIndex] for TID that is
                               1803                 :  * returned to scan first.  Second or subsequent TIDs for posting list should
                               1804                 :  * be saved by calling _bt_savepostingitem().
                               1805                 :  *
                               1806                 :  * Returns an offset into tuple storage space that main tuple is stored at if
                               1807                 :  * needed.
                               1808                 :  */
                               1809                 : static int
 1138 pg                       1810 GIC      310488 : _bt_setuppostingitems(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
                               1811                 :                       ItemPointer heapTid, IndexTuple itup)
                               1812                 : {
 1138 pg                       1813 CBC      310488 :     BTScanPosItem *currItem = &so->currPos.items[itemIndex];
                               1814                 : 
 1138 pg                       1815 GIC      310488 :     Assert(BTreeTupleIsPosting(itup));
 1138 pg                       1816 ECB             : 
 1138 pg                       1817 GIC      310488 :     currItem->heapTid = *heapTid;
 1138 pg                       1818 CBC      310488 :     currItem->indexOffset = offnum;
 1138 pg                       1819 GIC      310488 :     if (so->currTuples)
 1138 pg                       1820 ECB             :     {
                               1821                 :         /* Save base IndexTuple (truncate posting list) */
                               1822                 :         IndexTuple  base;
 1138 pg                       1823 GIC      133414 :         Size        itupsz = BTreeTupleGetPostingOffset(itup);
                               1824                 : 
                               1825          133414 :         itupsz = MAXALIGN(itupsz);
 1138 pg                       1826 CBC      133414 :         currItem->tupleOffset = so->currPos.nextTupleOffset;
 1138 pg                       1827 GIC      133414 :         base = (IndexTuple) (so->currTuples + so->currPos.nextTupleOffset);
 1138 pg                       1828 CBC      133414 :         memcpy(base, itup, itupsz);
 1138 pg                       1829 ECB             :         /* Defensively reduce work area index tuple header size */
 1138 pg                       1830 CBC      133414 :         base->t_info &= ~INDEX_SIZE_MASK;
                               1831          133414 :         base->t_info |= itupsz;
 1138 pg                       1832 GIC      133414 :         so->currPos.nextTupleOffset += itupsz;
 1138 pg                       1833 ECB             : 
 1138 pg                       1834 CBC      133414 :         return currItem->tupleOffset;
 1138 pg                       1835 ECB             :     }
                               1836                 : 
 1138 pg                       1837 CBC      177074 :     return 0;
                               1838                 : }
                               1839                 : 
 1138 pg                       1840 ECB             : /*
                               1841                 :  * Save an index item into so->currPos.items[itemIndex] for current posting
                               1842                 :  * tuple.
                               1843                 :  *
                               1844                 :  * Assumes that _bt_setuppostingitems() has already been called for current
                               1845                 :  * posting list tuple.  Caller passes its return value as tupleOffset.
                               1846                 :  */
                               1847                 : static inline void
 1138 pg                       1848 GIC     1210254 : _bt_savepostingitem(BTScanOpaque so, int itemIndex, OffsetNumber offnum,
                               1849                 :                     ItemPointer heapTid, int tupleOffset)
                               1850                 : {
 1138 pg                       1851 CBC     1210254 :     BTScanPosItem *currItem = &so->currPos.items[itemIndex];
                               1852                 : 
 1138 pg                       1853 GIC     1210254 :     currItem->heapTid = *heapTid;
 1138 pg                       1854 CBC     1210254 :     currItem->indexOffset = offnum;
                               1855                 : 
 1138 pg                       1856 ECB             :     /*
                               1857                 :      * Have index-only scans return the same base IndexTuple for every TID
                               1858                 :      * that originates from the same posting list
                               1859                 :      */
 1138 pg                       1860 GIC     1210254 :     if (so->currTuples)
                               1861          465558 :         currItem->tupleOffset = tupleOffset;
                               1862         1210254 : }
 1138 pg                       1863 ECB             : 
 9770 scrappy                  1864                 : /*
 6181 tgl                      1865                 :  *  _bt_steppage() -- Step to next page containing valid data for scan
                               1866                 :  *
                               1867                 :  * On entry, if so->currPos.buf is valid the buffer is pinned but not locked;
                               1868                 :  * if pinned, we'll drop the pin before moving to next page.  The buffer is
                               1869                 :  * not locked on entry.
                               1870                 :  *
                               1871                 :  * For success on a scan using a non-MVCC snapshot we hold a pin, but not a
                               1872                 :  * read lock, on that page.  If we do not hold the pin, we set so->currPos.buf
                               1873                 :  * to InvalidBuffer.  We return true to indicate success.
                               1874                 :  */
                               1875                 : static bool
 6181 tgl                      1876 GIC     4851051 : _bt_steppage(IndexScanDesc scan, ScanDirection dir)
                               1877                 : {
 8297                          1878         4851051 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 2244 rhaas                    1879 CBC     4851051 :     BlockNumber blkno = InvalidBlockNumber;
                               1880                 :     bool        status;
 8720 bruce                    1881 ECB             : 
 2937 kgrittn                  1882 CBC     4851051 :     Assert(BTScanPosIsValid(so->currPos));
                               1883                 : 
                               1884                 :     /* Before leaving current page, deal with any killed items */
 6181 tgl                      1885         4851051 :     if (so->numKilled > 0)
 2937 kgrittn                  1886 GIC       39535 :         _bt_killitems(scan);
                               1887                 : 
 6072 tgl                      1888 ECB             :     /*
 6031 bruce                    1889                 :      * Before we modify currPos, make a copy of the page data if there was a
                               1890                 :      * mark position that needs it.
                               1891                 :      */
 6072 tgl                      1892 GIC     4851051 :     if (so->markItemIndex >= 0)
                               1893                 :     {
                               1894                 :         /* bump pin on current buffer for assignment to mark buffer */
 2937 kgrittn                  1895 CBC         183 :         if (BTScanPosIsPinned(so->currPos))
 2937 kgrittn                  1896 GIC         175 :             IncrBufferRefCount(so->currPos.buf);
 6072 tgl                      1897             183 :         memcpy(&so->markPos, &so->currPos,
 6072 tgl                      1898 ECB             :                offsetof(BTScanPosData, items[1]) +
 6072 tgl                      1899 CBC         183 :                so->currPos.lastItem * sizeof(BTScanPosItem));
 4200                          1900             183 :         if (so->markTuples)
 4200 tgl                      1901 GIC         174 :             memcpy(so->markTuples, so->currTuples,
 4200 tgl                      1902 CBC         174 :                    so->currPos.nextTupleOffset);
 6072                          1903             183 :         so->markPos.itemIndex = so->markItemIndex;
                               1904             183 :         so->markItemIndex = -1;
 6072 tgl                      1905 ECB             :     }
                               1906                 : 
 9345 bruce                    1907 CBC     4851051 :     if (ScanDirectionIsForward(dir))
                               1908                 :     {
                               1909                 :         /* Walk right to the next page with data */
 2244 rhaas                    1910         4851005 :         if (scan->parallel_scan != NULL)
                               1911                 :         {
                               1912                 :             /*
 2244 rhaas                    1913 ECB             :              * Seize the scan to get the next block number; if the scan has
                               1914                 :              * ended already, bail out.
                               1915                 :              */
 2244 rhaas                    1916 GIC         644 :             status = _bt_parallel_seize(scan, &blkno);
                               1917             644 :             if (!status)
                               1918                 :             {
 2244 rhaas                    1919 ECB             :                 /* release the previous buffer, if pinned */
 2244 rhaas                    1920 CBC           6 :                 BTScanPosUnpinIfPinned(so->currPos);
 2244 rhaas                    1921 GIC           6 :                 BTScanPosInvalidate(so->currPos);
                               1922               6 :                 return false;
 2244 rhaas                    1923 ECB             :             }
                               1924                 :         }
                               1925                 :         else
                               1926                 :         {
                               1927                 :             /* Not parallel, so use the previously-saved nextPage link. */
 2244 rhaas                    1928 GIC     4850361 :             blkno = so->currPos.nextPage;
                               1929                 :         }
                               1930                 : 
 6181 tgl                      1931 ECB             :         /* Remember we left a page with data */
 6181 tgl                      1932 GIC     4850999 :         so->currPos.moreLeft = true;
                               1933                 : 
                               1934                 :         /* release the previous buffer, if pinned */
 2937 kgrittn                  1935 CBC     4850999 :         BTScanPosUnpinIfPinned(so->currPos);
                               1936                 :     }
                               1937                 :     else
 2244 rhaas                    1938 ECB             :     {
                               1939                 :         /* Remember we left a page with data */
 2244 rhaas                    1940 GIC          46 :         so->currPos.moreRight = true;
                               1941                 : 
                               1942              46 :         if (scan->parallel_scan != NULL)
 2244 rhaas                    1943 ECB             :         {
                               1944                 :             /*
                               1945                 :              * Seize the scan to get the current block number; if the scan has
                               1946                 :              * ended already, bail out.
                               1947                 :              */
 2244 rhaas                    1948 UIC           0 :             status = _bt_parallel_seize(scan, &blkno);
                               1949               0 :             BTScanPosUnpinIfPinned(so->currPos);
                               1950               0 :             if (!status)
 2244 rhaas                    1951 EUB             :             {
 2244 rhaas                    1952 UBC           0 :                 BTScanPosInvalidate(so->currPos);
                               1953               0 :                 return false;
                               1954                 :             }
 2244 rhaas                    1955 EUB             :         }
                               1956                 :         else
                               1957                 :         {
                               1958                 :             /* Not parallel, so just use our own notion of the current page */
 2244 rhaas                    1959 GIC          46 :             blkno = so->currPos.currPage;
                               1960                 :         }
                               1961                 :     }
 2244 rhaas                    1962 ECB             : 
 2244 rhaas                    1963 GIC     4851045 :     if (!_bt_readnextpage(scan, blkno, dir))
                               1964         4836686 :         return false;
                               1965                 : 
 2244 rhaas                    1966 ECB             :     /* Drop the lock, and maybe the pin, on the current page */
 2244 rhaas                    1967 CBC       14359 :     _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
                               1968                 : 
 2244 rhaas                    1969 GIC       14359 :     return true;
 2244 rhaas                    1970 ECB             : }
                               1971                 : 
                               1972                 : /*
                               1973                 :  *  _bt_readnextpage() -- Read next page containing valid data for scan
                               1974                 :  *
                               1975                 :  * On success exit, so->currPos is updated to contain data from the next
                               1976                 :  * interesting page.  Caller is responsible to release lock and pin on
                               1977                 :  * buffer on success.  We return true to indicate success.
                               1978                 :  *
                               1979                 :  * If there are no more matching records in the given direction, we drop all
                               1980                 :  * locks and pins, set so->currPos.buf to InvalidBuffer, and return false.
                               1981                 :  */
                               1982                 : static bool
 2244 rhaas                    1983 GIC     4851049 : _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
                               1984                 : {
                               1985         4851049 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
 2244 rhaas                    1986 ECB             :     Relation    rel;
                               1987                 :     Page        page;
                               1988                 :     BTPageOpaque opaque;
                               1989                 :     bool        status;
                               1990                 : 
 2244 rhaas                    1991 GIC     4851049 :     rel = scan->indexRelation;
                               1992                 : 
                               1993         4851049 :     if (ScanDirectionIsForward(dir))
 2244 rhaas                    1994 ECB             :     {
                               1995                 :         for (;;)
 9345 bruce                    1996                 :         {
                               1997                 :             /*
                               1998                 :              * if we're at end of scan, give up and mark parallel scan as
                               1999                 :              * done, so that all the workers can finish their scan
                               2000                 :              */
 6181 tgl                      2001 GIC     4851755 :             if (blkno == P_NONE || !so->currPos.moreRight)
                               2002                 :             {
 2244 rhaas                    2003         4836652 :                 _bt_parallel_done(scan);
 2937 kgrittn                  2004 CBC     4836652 :                 BTScanPosInvalidate(so->currPos);
 6181 tgl                      2005 GIC     4836652 :                 return false;
 2937 kgrittn                  2006 ECB             :             }
 5087 tgl                      2007                 :             /* check for interrupts while we're not holding any buffer lock */
 5087 tgl                      2008 CBC       15103 :             CHECK_FOR_INTERRUPTS();
                               2009                 :             /* step right one page */
    8 andres                   2010 GNC       15103 :             so->currPos.buf = _bt_getbuf(rel, scan->heapRelation, blkno, BT_READ);
 2545 kgrittn                  2011 CBC       15103 :             page = BufferGetPage(so->currPos.buf);
 2545 kgrittn                  2012 GIC       15103 :             TestForOldSnapshot(scan->xs_snapshot, rel, page);
  373 michael                  2013 CBC       15103 :             opaque = BTPageGetOpaque(page);
 2259 rhaas                    2014 ECB             :             /* check for deleted page */
 6181 tgl                      2015 CBC       15103 :             if (!P_IGNORE(opaque))
 9345 bruce                    2016 ECB             :             {
 4316 heikki.linnakangas       2017 GIC       15103 :                 PredicateLockPage(rel, blkno, scan->xs_snapshot);
 6181 tgl                      2018 ECB             :                 /* see if there are any matches on this page */
                               2019                 :                 /* note that this will clear moreRight if we can stop */
 6181 tgl                      2020 CBC       15103 :                 if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque)))
 6181 tgl                      2021 GIC       14351 :                     break;
                               2022                 :             }
 1943 rhaas                    2023 LBC           0 :             else if (scan->parallel_scan != NULL)
 1943 rhaas                    2024 ECB             :             {
                               2025                 :                 /* allow next page be processed by parallel worker */
 1943 rhaas                    2026 UBC           0 :                 _bt_parallel_release(scan, opaque->btpo_next);
                               2027                 :             }
                               2028                 : 
 6181 tgl                      2029 EUB             :             /* nope, keep going */
 2244 rhaas                    2030 GIC         752 :             if (scan->parallel_scan != NULL)
                               2031                 :             {
 1717 akapila                  2032 UIC           0 :                 _bt_relbuf(rel, so->currPos.buf);
 2244 rhaas                    2033 LBC           0 :                 status = _bt_parallel_seize(scan, &blkno);
 2244 rhaas                    2034 UIC           0 :                 if (!status)
 2244 rhaas                    2035 EUB             :                 {
 2244 rhaas                    2036 UBC           0 :                     BTScanPosInvalidate(so->currPos);
                               2037               0 :                     return false;
                               2038                 :                 }
 2244 rhaas                    2039 EUB             :             }
                               2040                 :             else
                               2041                 :             {
 2244 rhaas                    2042 GIC         752 :                 blkno = opaque->btpo_next;
 1717 akapila                  2043             752 :                 _bt_relbuf(rel, so->currPos.buf);
                               2044                 :             }
 9770 scrappy                  2045 ECB             :         }
                               2046                 :     }
                               2047                 :     else
                               2048                 :     {
                               2049                 :         /*
                               2050                 :          * Should only happen in parallel cases, when some other backend
                               2051                 :          * advanced the scan.
                               2052                 :          */
 2244 rhaas                    2053 GIC          46 :         if (so->currPos.currPage != blkno)
                               2054                 :         {
 2244 rhaas                    2055 UIC           0 :             BTScanPosUnpinIfPinned(so->currPos);
 2244 rhaas                    2056 LBC           0 :             so->currPos.currPage = blkno;
                               2057                 :         }
 6181 tgl                      2058 EUB             : 
                               2059                 :         /*
                               2060                 :          * Walk left to the next page with data.  This is much more complex
                               2061                 :          * than the walk-right case because of the possibility that the page
                               2062                 :          * to our left splits while we are in flight to it, plus the
                               2063                 :          * possibility that the page we were on gets deleted after we leave
                               2064                 :          * it.  See nbtree/README for details.
                               2065                 :          *
                               2066                 :          * It might be possible to rearrange this code to have less overhead
                               2067                 :          * in pinning and locking, but that would require capturing the left
                               2068                 :          * pointer when the page is initially read, and using it here, along
                               2069                 :          * with big changes to _bt_walk_left() and the code below.  It is not
                               2070                 :          * clear whether this would be a win, since if the page immediately to
                               2071                 :          * the left splits after we read this page and before we step left, we
                               2072                 :          * would need to visit more pages than with the current code.
                               2073                 :          *
                               2074                 :          * Note that if we change the code so that we drop the pin for a scan
                               2075                 :          * which uses a non-MVCC snapshot, we will need to modify the code for
                               2076                 :          * walking left, to allow for the possibility that a referenced page
                               2077                 :          * has been deleted.  As long as the buffer is pinned or the snapshot
                               2078                 :          * is MVCC the page cannot move past the half-dead state to fully
                               2079                 :          * deleted.
                               2080                 :          */
 2937 kgrittn                  2081 GIC          46 :         if (BTScanPosIsPinned(so->currPos))
  992 pg                       2082              24 :             _bt_lockbuf(rel, so->currPos.buf, BT_READ);
                               2083                 :         else
    8 andres                   2084 GNC          22 :             so->currPos.buf = _bt_getbuf(rel, scan->heapRelation,
                               2085                 :                                          so->currPos.currPage, BT_READ);
 2937 kgrittn                  2086 ECB             : 
                               2087                 :         for (;;)
 9345 bruce                    2088                 :         {
                               2089                 :             /* Done if we know there are no matching keys to the left */
 6181 tgl                      2090 GIC          48 :             if (!so->currPos.moreLeft)
                               2091                 :             {
                               2092              21 :                 _bt_relbuf(rel, so->currPos.buf);
 2244 rhaas                    2093              21 :                 _bt_parallel_done(scan);
 2937 kgrittn                  2094 CBC          21 :                 BTScanPosInvalidate(so->currPos);
 6181 tgl                      2095 GIC          21 :                 return false;
 6181 tgl                      2096 ECB             :             }
 7351                          2097                 : 
 6181                          2098                 :             /* Step to next physical page */
    8 andres                   2099 GNC          27 :             so->currPos.buf = _bt_walk_left(rel, scan->heapRelation,
                               2100                 :                                             so->currPos.buf, scan->xs_snapshot);
                               2101                 : 
                               2102                 :             /* if we're physically at end of index, return failure */
 6181 tgl                      2103 CBC          27 :             if (so->currPos.buf == InvalidBuffer)
                               2104                 :             {
 2244 rhaas                    2105 GIC          13 :                 _bt_parallel_done(scan);
 2937 kgrittn                  2106              13 :                 BTScanPosInvalidate(so->currPos);
 6181 tgl                      2107 CBC          13 :                 return false;
                               2108                 :             }
 6181 tgl                      2109 ECB             : 
                               2110                 :             /*
 6031 bruce                    2111                 :              * Okay, we managed to move left to a non-deleted page. Done if
                               2112                 :              * it's not half-dead and contains matching tuples. Else loop back
                               2113                 :              * and do it all again.
                               2114                 :              */
 2545 kgrittn                  2115 GIC          14 :             page = BufferGetPage(so->currPos.buf);
                               2116              14 :             TestForOldSnapshot(scan->xs_snapshot, rel, page);
  373 michael                  2117              14 :             opaque = BTPageGetOpaque(page);
 6181 tgl                      2118              14 :             if (!P_IGNORE(opaque))
 6181 tgl                      2119 ECB             :             {
 4316 heikki.linnakangas       2120 CBC          14 :                 PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot);
 6181 tgl                      2121 ECB             :                 /* see if there are any matches on this page */
                               2122                 :                 /* note that this will clear moreLeft if we can stop */
 6181 tgl                      2123 GIC          14 :                 if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page)))
 6181 tgl                      2124 CBC          12 :                     break;
                               2125                 :             }
 1943 rhaas                    2126 UIC           0 :             else if (scan->parallel_scan != NULL)
 1943 rhaas                    2127 ECB             :             {
                               2128                 :                 /* allow next page be processed by parallel worker */
 1943 rhaas                    2129 UIC           0 :                 _bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf));
 1943 rhaas                    2130 EUB             :             }
                               2131                 : 
                               2132                 :             /*
 2244                          2133                 :              * For parallel scans, get the last page scanned as it is quite
                               2134                 :              * possible that by the time we try to seize the scan, some other
                               2135                 :              * worker has already advanced the scan to a different page.  We
                               2136                 :              * must continue based on the latest page scanned by any worker.
                               2137                 :              */
 2244 rhaas                    2138 GIC           2 :             if (scan->parallel_scan != NULL)
                               2139                 :             {
 2244 rhaas                    2140 UIC           0 :                 _bt_relbuf(rel, so->currPos.buf);
                               2141               0 :                 status = _bt_parallel_seize(scan, &blkno);
 2244 rhaas                    2142 LBC           0 :                 if (!status)
                               2143                 :                 {
 2244 rhaas                    2144 UBC           0 :                     BTScanPosInvalidate(so->currPos);
                               2145               0 :                     return false;
 2244 rhaas                    2146 EUB             :                 }
    8 andres                   2147 UNC           0 :                 so->currPos.buf = _bt_getbuf(rel, scan->heapRelation, blkno,
                               2148                 :                                              BT_READ);
 2244 rhaas                    2149 EUB             :             }
 9770 scrappy                  2150                 :         }
                               2151                 :     }
 8297 tgl                      2152                 : 
 2244 rhaas                    2153 GIC       14363 :     return true;
                               2154                 : }
                               2155                 : 
                               2156                 : /*
                               2157                 :  *  _bt_parallel_readpage() -- Read current page containing valid data for scan
 2244 rhaas                    2158 ECB             :  *
                               2159                 :  * On success, release lock and maybe pin on buffer.  We return true to
                               2160                 :  * indicate success.
                               2161                 :  */
                               2162                 : static bool
 2244 rhaas                    2163 GIC           4 : _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno, ScanDirection dir)
                               2164                 : {
                               2165               4 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                               2166                 : 
                               2167               4 :     _bt_initialize_more_data(so, dir);
 2244 rhaas                    2168 ECB             : 
 2244 rhaas                    2169 GIC           4 :     if (!_bt_readnextpage(scan, blkno, dir))
 2244 rhaas                    2170 LBC           0 :         return false;
                               2171                 : 
 2937 kgrittn                  2172 ECB             :     /* Drop the lock, and maybe the pin, on the current page */
 2937 kgrittn                  2173 GIC           4 :     _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
 2937 kgrittn                  2174 ECB             : 
 8986 bruce                    2175 GBC           4 :     return true;
                               2176                 : }
                               2177                 : 
 7351 tgl                      2178 ECB             : /*
                               2179                 :  * _bt_walk_left() -- step left one page, if possible
                               2180                 :  *
                               2181                 :  * The given buffer must be pinned and read-locked.  This will be dropped
                               2182                 :  * before stepping left.  On return, we have pin and read lock on the
                               2183                 :  * returned page, instead.
                               2184                 :  *
                               2185                 :  * Returns InvalidBuffer if there is no page to the left (no lock is held
                               2186                 :  * in that case).
                               2187                 :  *
                               2188                 :  * When working on a non-leaf level, it is possible for the returned page
                               2189                 :  * to be half-dead; the caller should check that condition and step left
                               2190                 :  * again if it's important.
                               2191                 :  */
                               2192                 : static Buffer
    8 andres                   2193 GNC          27 : _bt_walk_left(Relation rel, Relation heaprel, Buffer buf, Snapshot snapshot)
                               2194                 : {
                               2195                 :     Page        page;
                               2196                 :     BTPageOpaque opaque;
                               2197                 : 
 2545 kgrittn                  2198 CBC          27 :     page = BufferGetPage(buf);
  373 michael                  2199 GIC          27 :     opaque = BTPageGetOpaque(page);
                               2200                 : 
                               2201                 :     for (;;)
 7351 tgl                      2202 UIC           0 :     {
 7351 tgl                      2203 ECB             :         BlockNumber obknum;
                               2204                 :         BlockNumber lblkno;
                               2205                 :         BlockNumber blkno;
                               2206                 :         int         tries;
 7351 tgl                      2207 EUB             : 
                               2208                 :         /* if we're at end of tree, release buf and return failure */
 7351 tgl                      2209 GIC          27 :         if (P_LEFTMOST(opaque))
                               2210                 :         {
                               2211              13 :             _bt_relbuf(rel, buf);
                               2212              13 :             break;
                               2213                 :         }
 7351 tgl                      2214 ECB             :         /* remember original page we are stepping left from */
 7351 tgl                      2215 GIC          14 :         obknum = BufferGetBlockNumber(buf);
 7351 tgl                      2216 ECB             :         /* step left */
 7351 tgl                      2217 CBC          14 :         blkno = lblkno = opaque->btpo_prev;
 5087 tgl                      2218 GIC          14 :         _bt_relbuf(rel, buf);
                               2219                 :         /* check for interrupts while we're not holding any buffer lock */
 5087 tgl                      2220 CBC          14 :         CHECK_FOR_INTERRUPTS();
    8 andres                   2221 GNC          14 :         buf = _bt_getbuf(rel, heaprel, blkno, BT_READ);
 2545 kgrittn                  2222 CBC          14 :         page = BufferGetPage(buf);
                               2223              14 :         TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2224 GIC          14 :         opaque = BTPageGetOpaque(page);
 7188 bruce                    2225 ECB             : 
 7351 tgl                      2226                 :         /*
 7188 bruce                    2227                 :          * If this isn't the page we want, walk right till we find what we
 6385                          2228                 :          * want --- but go no more than four hops (an arbitrary limit). If we
                               2229                 :          * don't find the correct page by then, the most likely bet is that
                               2230                 :          * the original page got deleted and isn't in the sibling chain at all
                               2231                 :          * anymore, not that its left sibling got split more than four times.
                               2232                 :          *
                               2233                 :          * Note that it is correct to test P_ISDELETED not P_IGNORE here,
                               2234                 :          * because half-dead pages are still in the sibling chain.  Caller
                               2235                 :          * must reject half-dead pages if wanted.
                               2236                 :          */
 7351 tgl                      2237 GIC          14 :         tries = 0;
                               2238                 :         for (;;)
                               2239                 :         {
                               2240              14 :             if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum)
                               2241                 :             {
 7351 tgl                      2242 ECB             :                 /* Found desired page, return it */
 7351 tgl                      2243 GIC          14 :                 return buf;
                               2244                 :             }
 7351 tgl                      2245 LBC           0 :             if (P_RIGHTMOST(opaque) || ++tries > 4)
                               2246                 :                 break;
 7351 tgl                      2247 UIC           0 :             blkno = opaque->btpo_next;
 6927 tgl                      2248 LBC           0 :             buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 2545 kgrittn                  2249 UIC           0 :             page = BufferGetPage(buf);
 2545 kgrittn                  2250 UBC           0 :             TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2251 UIC           0 :             opaque = BTPageGetOpaque(page);
 7351 tgl                      2252 EUB             :         }
                               2253                 : 
                               2254                 :         /* Return to the original page to see what's up */
 6927 tgl                      2255 UBC           0 :         buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
 2545 kgrittn                  2256               0 :         page = BufferGetPage(buf);
 2545 kgrittn                  2257 UIC           0 :         TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2258               0 :         opaque = BTPageGetOpaque(page);
 7351 tgl                      2259               0 :         if (P_ISDELETED(opaque))
 7351 tgl                      2260 EUB             :         {
                               2261                 :             /*
 3260 bruce                    2262                 :              * It was deleted.  Move right to first nondeleted page (there
 6385                          2263                 :              * must be one); that is the page that has acquired the deleted
                               2264                 :              * one's keyspace, so stepping left from it will take us where we
                               2265                 :              * want to be.
                               2266                 :              */
                               2267                 :             for (;;)
                               2268                 :             {
 7351 tgl                      2269 UIC           0 :                 if (P_RIGHTMOST(opaque))
 5578                          2270               0 :                     elog(ERROR, "fell off the end of index \"%s\"",
                               2271                 :                          RelationGetRelationName(rel));
 7351                          2272               0 :                 blkno = opaque->btpo_next;
 6927                          2273               0 :                 buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 2545 kgrittn                  2274 UBC           0 :                 page = BufferGetPage(buf);
                               2275               0 :                 TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2276 UIC           0 :                 opaque = BTPageGetOpaque(page);
 7351 tgl                      2277 UBC           0 :                 if (!P_ISDELETED(opaque))
                               2278               0 :                     break;
 7351 tgl                      2279 EUB             :             }
 7188 bruce                    2280                 : 
 7351 tgl                      2281                 :             /*
 6385 bruce                    2282                 :              * Now return to top of loop, resetting obknum to point to this
                               2283                 :              * nondeleted page, and try again.
                               2284                 :              */
                               2285                 :         }
                               2286                 :         else
                               2287                 :         {
                               2288                 :             /*
                               2289                 :              * It wasn't deleted; the explanation had better be that the page
                               2290                 :              * to the left got split or deleted. Without this check, we'd go
                               2291                 :              * into an infinite loop if there's anything wrong.
                               2292                 :              */
 7351 tgl                      2293 UIC           0 :             if (opaque->btpo_prev == lblkno)
 5578                          2294               0 :                 elog(ERROR, "could not find left sibling of block %u in index \"%s\"",
                               2295                 :                      obknum, RelationGetRelationName(rel));
                               2296                 :             /* Okay to try again with new lblkno value */
                               2297                 :         }
 7351 tgl                      2298 EUB             :     }
                               2299                 : 
 7351 tgl                      2300 GIC          13 :     return InvalidBuffer;
                               2301                 : }
                               2302                 : 
                               2303                 : /*
                               2304                 :  * _bt_get_endpoint() -- Find the first or last page on a given tree level
 7352 tgl                      2305 ECB             :  *
                               2306                 :  * If the index is empty, we will return InvalidBuffer; any other failure
                               2307                 :  * condition causes ereport().  We will not return a dead page.
                               2308                 :  *
                               2309                 :  * The returned buffer is pinned and read-locked.
                               2310                 :  */
                               2311                 : Buffer
    8 andres                   2312 GNC       31969 : _bt_get_endpoint(Relation rel, Relation heaprel, uint32 level, bool rightmost,
                               2313                 :                  Snapshot snapshot)
                               2314                 : {
                               2315                 :     Buffer      buf;
                               2316                 :     Page        page;
 7352 tgl                      2317 ECB             :     BTPageOpaque opaque;
                               2318                 :     OffsetNumber offnum;
                               2319                 :     BlockNumber blkno;
                               2320                 :     IndexTuple  itup;
                               2321                 : 
                               2322                 :     /*
                               2323                 :      * If we are looking for a leaf page, okay to descend from fast root;
                               2324                 :      * otherwise better descend from true root.  (There is no point in being
                               2325                 :      * smarter about intermediate levels.)
                               2326                 :      */
 7352 tgl                      2327 GIC       31969 :     if (level == 0)
    8 andres                   2328 GNC       31957 :         buf = _bt_getroot(rel, heaprel, BT_READ);
                               2329                 :     else
                               2330              12 :         buf = _bt_gettrueroot(rel, heaprel);
                               2331                 : 
 7352 tgl                      2332 CBC       31969 :     if (!BufferIsValid(buf))
                               2333            3197 :         return InvalidBuffer;
                               2334                 : 
 2545 kgrittn                  2335           28772 :     page = BufferGetPage(buf);
 2545 kgrittn                  2336 GIC       28772 :     TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2337 CBC       28772 :     opaque = BTPageGetOpaque(page);
 7352 tgl                      2338 ECB             : 
                               2339                 :     for (;;)
                               2340                 :     {
                               2341                 :         /*
                               2342                 :          * If we landed on a deleted page, step right to find a live page
                               2343                 :          * (there must be one).  Also, if we want the rightmost page, step
                               2344                 :          * right if needed to get to it (this could happen if the page split
                               2345                 :          * since we obtained a pointer to it).
                               2346                 :          */
 7351 tgl                      2347 GIC       42145 :         while (P_IGNORE(opaque) ||
 7352                          2348              27 :                (rightmost && !P_RIGHTMOST(opaque)))
                               2349                 :         {
 7352 tgl                      2350 UIC           0 :             blkno = opaque->btpo_next;
                               2351               0 :             if (blkno == P_NONE)
 5578 tgl                      2352 LBC           0 :                 elog(ERROR, "fell off the end of index \"%s\"",
 7351 tgl                      2353 ECB             :                      RelationGetRelationName(rel));
 6927 tgl                      2354 UIC           0 :             buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 2545 kgrittn                  2355 UBC           0 :             page = BufferGetPage(buf);
                               2356               0 :             TestForOldSnapshot(snapshot, rel, page);
  373 michael                  2357               0 :             opaque = BTPageGetOpaque(page);
                               2358                 :         }
 7352 tgl                      2359 EUB             : 
                               2360                 :         /* Done? */
  774 pg                       2361 GBC       42145 :         if (opaque->btpo_level == level)
 7352 tgl                      2362           28772 :             break;
  774 pg                       2363 GIC       13373 :         if (opaque->btpo_level < level)
 1347 peter                    2364 UIC           0 :             ereport(ERROR,
                               2365                 :                     (errcode(ERRCODE_INDEX_CORRUPTED),
 1347 peter                    2366 ECB             :                      errmsg_internal("btree level %u not found in index \"%s\"",
                               2367                 :                                      level, RelationGetRelationName(rel))));
 7352 tgl                      2368                 : 
 7351 tgl                      2369 EUB             :         /* Descend to leftmost or rightmost child page */
 7352 tgl                      2370 GIC       13373 :         if (rightmost)
                               2371               3 :             offnum = PageGetMaxOffsetNumber(page);
                               2372                 :         else
                               2373           13370 :             offnum = P_FIRSTDATAKEY(opaque);
                               2374                 : 
 6283 tgl                      2375 CBC       13373 :         itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
 1210 pg                       2376           13373 :         blkno = BTreeTupleGetDownLink(itup);
                               2377                 : 
 6927 tgl                      2378           13373 :         buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
 2545 kgrittn                  2379 GIC       13373 :         page = BufferGetPage(buf);
  373 michael                  2380 CBC       13373 :         opaque = BTPageGetOpaque(page);
 7352 tgl                      2381 ECB             :     }
                               2382                 : 
 7352 tgl                      2383 CBC       28772 :     return buf;
 7352 tgl                      2384 ECB             : }
                               2385                 : 
                               2386                 : /*
                               2387                 :  *  _bt_endpoint() -- Find the first or last page in the index, and scan
 7088                          2388                 :  * from there to the first key satisfying all the quals.
                               2389                 :  *
                               2390                 :  * This is used by _bt_first() to set up a scan when we've determined
                               2391                 :  * that the scan must start at the beginning or end of the index (for
                               2392                 :  * a forward or backward scan respectively).  Exit conditions are the
                               2393                 :  * same as for _bt_first().
                               2394                 :  */
                               2395                 : static bool
 9770 scrappy                  2396 GIC       31957 : _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
                               2397                 : {
 6181 tgl                      2398           31957 :     Relation    rel = scan->indexRelation;
                               2399           31957 :     BTScanOpaque so = (BTScanOpaque) scan->opaque;
                               2400                 :     Buffer      buf;
 9344 bruce                    2401 ECB             :     Page        page;
                               2402                 :     BTPageOpaque opaque;
 8297 tgl                      2403                 :     OffsetNumber start;
 4200                          2404                 :     BTScanPosItem *currItem;
                               2405                 : 
                               2406                 :     /*
                               2407                 :      * Scan down to the leftmost or rightmost leaf page.  This is a simplified
                               2408                 :      * version of _bt_search().  We don't maintain a stack since we know we
                               2409                 :      * won't need it.
                               2410                 :      */
    8 andres                   2411 GNC       31957 :     buf = _bt_get_endpoint(rel, scan->heapRelation, 0,
                               2412                 :                            ScanDirectionIsBackward(dir), scan->xs_snapshot);
                               2413                 : 
 8053 bruce                    2414 GIC       31957 :     if (!BufferIsValid(buf))
                               2415                 :     {
                               2416                 :         /*
 4316 heikki.linnakangas       2417 ECB             :          * Empty index. Lock the whole relation, as nothing finer to lock
                               2418                 :          * exists.
                               2419                 :          */
 4316 heikki.linnakangas       2420 CBC        3197 :         PredicateLockRelation(rel, scan->xs_snapshot);
 2937 kgrittn                  2421 GIC        3197 :         BTScanPosInvalidate(so->currPos);
 7629 tgl                      2422            3197 :         return false;
                               2423                 :     }
                               2424                 : 
 4316 heikki.linnakangas       2425           28760 :     PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
 2545 kgrittn                  2426 CBC       28760 :     page = BufferGetPage(buf);
  373 michael                  2427           28760 :     opaque = BTPageGetOpaque(page);
 7352 tgl                      2428           28760 :     Assert(P_ISLEAF(opaque));
                               2429                 : 
 9345 bruce                    2430 GIC       28760 :     if (ScanDirectionIsForward(dir))
 9345 bruce                    2431 ECB             :     {
 7352 tgl                      2432                 :         /* There could be dead pages to the left, so not this: */
                               2433                 :         /* Assert(P_LEFTMOST(opaque)); */
 9345 bruce                    2434                 : 
 8297 tgl                      2435 GIC       28736 :         start = P_FIRSTDATAKEY(opaque);
 9345 bruce                    2436 ECB             :     }
 9345 bruce                    2437 GIC          24 :     else if (ScanDirectionIsBackward(dir))
                               2438                 :     {
 8297 tgl                      2439              24 :         Assert(P_RIGHTMOST(opaque));
                               2440                 : 
 8297 tgl                      2441 CBC          24 :         start = PageGetMaxOffsetNumber(page);
                               2442                 :     }
 9345 bruce                    2443 ECB             :     else
                               2444                 :     {
 7202 tgl                      2445 LBC           0 :         elog(ERROR, "invalid scan direction: %d", (int) dir);
                               2446                 :         start = 0;              /* keep compiler quiet */
 8297 tgl                      2447 ECB             :     }
                               2448                 : 
                               2449                 :     /* remember which buffer we have pinned */
 6181 tgl                      2450 GIC       28760 :     so->currPos.buf = buf;
 8297 tgl                      2451 EUB             : 
 2244 rhaas                    2452 GIC       28760 :     _bt_initialize_more_data(so, dir);
                               2453                 : 
                               2454                 :     /*
                               2455                 :      * Now load data from the first page of the scan.
 7088 tgl                      2456 ECB             :      */
 6181 tgl                      2457 GIC       28760 :     if (!_bt_readpage(scan, dir, start))
 9620 vadim4o                  2458 ECB             :     {
                               2459                 :         /*
                               2460                 :          * There's no actually-matching data on this page.  Try to advance to
                               2461                 :          * the next page.  Return false if there's no matching data at all.
                               2462                 :          */
  992 pg                       2463 CBC         820 :         _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
 6181 tgl                      2464 GIC         820 :         if (!_bt_steppage(scan, dir))
                               2465             781 :             return false;
                               2466                 :     }
                               2467                 :     else
                               2468                 :     {
 2937 kgrittn                  2469 ECB             :         /* Drop the lock, and maybe the pin, on the current page */
 2937 kgrittn                  2470 CBC       27940 :         _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
 2937 kgrittn                  2471 ECB             :     }
                               2472                 : 
                               2473                 :     /* OK, itemIndex says what to return */
 4200 tgl                      2474 GIC       27979 :     currItem = &so->currPos.items[so->currPos.itemIndex];
 1490 andres                   2475           27979 :     scan->xs_heaptid = currItem->heapTid;
 4200 tgl                      2476 CBC       27979 :     if (scan->xs_want_itup)
 4200 tgl                      2477 GIC       25940 :         scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
                               2478                 : 
 6181                          2479           27979 :     return true;
 9770 scrappy                  2480 ECB             : }
 2244 rhaas                    2481                 : 
                               2482                 : /*
                               2483                 :  * _bt_initialize_more_data() -- initialize moreLeft/moreRight appropriately
                               2484                 :  * for scan direction
                               2485                 :  */
                               2486                 : static inline void
 2244 rhaas                    2487 GIC     8697595 : _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
                               2488                 : {
                               2489                 :     /* initialize moreLeft/moreRight appropriately for scan direction */
                               2490         8697595 :     if (ScanDirectionIsForward(dir))
                               2491                 :     {
                               2492         8676004 :         so->currPos.moreLeft = false;
 2244 rhaas                    2493 CBC     8676004 :         so->currPos.moreRight = true;
                               2494                 :     }
                               2495                 :     else
 2244 rhaas                    2496 ECB             :     {
 2244 rhaas                    2497 GIC       21591 :         so->currPos.moreLeft = true;
 2244 rhaas                    2498 CBC       21591 :         so->currPos.moreRight = false;
 2244 rhaas                    2499 ECB             :     }
 2244 rhaas                    2500 GIC     8697595 :     so->numKilled = 0;           /* just paranoia */
                               2501         8697595 :     so->markItemIndex = -1;      /* ditto */
                               2502         8697595 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a