LCOV - differential code coverage report
Current view: top level - src/include/access - nbtree.h (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128 Lines: 99.0 % 96 95 1 95
Current Date: 2024-04-14 14:21:10 Functions: 100.0 % 17 17 17
Baseline: 16@8cea358b128 Branches: 70.0 % 60 42 18 42
Baseline Date: 2024-04-14 14:21:09 Line coverage date bins:
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed (240..) days: 99.0 % 96 95 1 95
Function coverage date bins:
(240..) days: 100.0 % 17 17 17
Branch coverage date bins:
(240..) days: 70.0 % 60 42 18 42

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * nbtree.h
                                  4                 :                :  *    header file for postgres btree access method implementation.
                                  5                 :                :  *
                                  6                 :                :  *
                                  7                 :                :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
                                  8                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :                :  *
                                 10                 :                :  * src/include/access/nbtree.h
                                 11                 :                :  *
                                 12                 :                :  *-------------------------------------------------------------------------
                                 13                 :                :  */
                                 14                 :                : #ifndef NBTREE_H
                                 15                 :                : #define NBTREE_H
                                 16                 :                : 
                                 17                 :                : #include "access/amapi.h"
                                 18                 :                : #include "access/itup.h"
                                 19                 :                : #include "access/sdir.h"
                                 20                 :                : #include "access/tableam.h"
                                 21                 :                : #include "access/xlogreader.h"
                                 22                 :                : #include "catalog/pg_am_d.h"
                                 23                 :                : #include "catalog/pg_index.h"
                                 24                 :                : #include "lib/stringinfo.h"
                                 25                 :                : #include "storage/bufmgr.h"
                                 26                 :                : #include "storage/shm_toc.h"
                                 27                 :                : 
                                 28                 :                : /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
                                 29                 :                : typedef uint16 BTCycleId;
                                 30                 :                : 
                                 31                 :                : /*
                                 32                 :                :  *  BTPageOpaqueData -- At the end of every page, we store a pointer
                                 33                 :                :  *  to both siblings in the tree.  This is used to do forward/backward
                                 34                 :                :  *  index scans.  The next-page link is also critical for recovery when
                                 35                 :                :  *  a search has navigated to the wrong page due to concurrent page splits
                                 36                 :                :  *  or deletions; see src/backend/access/nbtree/README for more info.
                                 37                 :                :  *
                                 38                 :                :  *  In addition, we store the page's btree level (counting upwards from
                                 39                 :                :  *  zero at a leaf page) as well as some flag bits indicating the page type
                                 40                 :                :  *  and status.  If the page is deleted, a BTDeletedPageData struct is stored
                                 41                 :                :  *  in the page's tuple area, while a standard BTPageOpaqueData struct is
                                 42                 :                :  *  stored in the page special area.
                                 43                 :                :  *
                                 44                 :                :  *  We also store a "vacuum cycle ID".  When a page is split while VACUUM is
                                 45                 :                :  *  processing the index, a nonzero value associated with the VACUUM run is
                                 46                 :                :  *  stored into both halves of the split page.  (If VACUUM is not running,
                                 47                 :                :  *  both pages receive zero cycleids.)  This allows VACUUM to detect whether
                                 48                 :                :  *  a page was split since it started, with a small probability of false match
                                 49                 :                :  *  if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs
                                 50                 :                :  *  ago.  Also, during a split, the BTP_SPLIT_END flag is cleared in the left
                                 51                 :                :  *  (original) page, and set in the right page, but only if the next page
                                 52                 :                :  *  to its right has a different cycleid.
                                 53                 :                :  *
                                 54                 :                :  *  NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
                                 55                 :                :  *  instead.
                                 56                 :                :  *
                                 57                 :                :  *  NOTE: the btpo_level field used to be a union type in order to allow
                                 58                 :                :  *  deleted pages to store a 32-bit safexid in the same field.  We now store
                                 59                 :                :  *  64-bit/full safexid values using BTDeletedPageData instead.
                                 60                 :                :  */
                                 61                 :                : 
                                 62                 :                : typedef struct BTPageOpaqueData
                                 63                 :                : {
                                 64                 :                :     BlockNumber btpo_prev;      /* left sibling, or P_NONE if leftmost */
                                 65                 :                :     BlockNumber btpo_next;      /* right sibling, or P_NONE if rightmost */
                                 66                 :                :     uint32      btpo_level;     /* tree level --- zero for leaf pages */
                                 67                 :                :     uint16      btpo_flags;     /* flag bits, see below */
                                 68                 :                :     BTCycleId   btpo_cycleid;   /* vacuum cycle ID of latest split */
                                 69                 :                : } BTPageOpaqueData;
                                 70                 :                : 
                                 71                 :                : typedef BTPageOpaqueData *BTPageOpaque;
                                 72                 :                : 
                                 73                 :                : #define BTPageGetOpaque(page) ((BTPageOpaque) PageGetSpecialPointer(page))
                                 74                 :                : 
                                 75                 :                : /* Bits defined in btpo_flags */
                                 76                 :                : #define BTP_LEAF        (1 << 0)  /* leaf page, i.e. not internal page */
                                 77                 :                : #define BTP_ROOT        (1 << 1)  /* root page (has no parent) */
                                 78                 :                : #define BTP_DELETED     (1 << 2)  /* page has been deleted from tree */
                                 79                 :                : #define BTP_META        (1 << 3)  /* meta-page */
                                 80                 :                : #define BTP_HALF_DEAD   (1 << 4)  /* empty, but still in tree */
                                 81                 :                : #define BTP_SPLIT_END   (1 << 5)  /* rightmost page of split group */
                                 82                 :                : #define BTP_HAS_GARBAGE (1 << 6)  /* page has LP_DEAD tuples (deprecated) */
                                 83                 :                : #define BTP_INCOMPLETE_SPLIT (1 << 7) /* right sibling's downlink is missing */
                                 84                 :                : #define BTP_HAS_FULLXID (1 << 8)  /* contains BTDeletedPageData */
                                 85                 :                : 
                                 86                 :                : /*
                                 87                 :                :  * The max allowed value of a cycle ID is a bit less than 64K.  This is
                                 88                 :                :  * for convenience of pg_filedump and similar utilities: we want to use
                                 89                 :                :  * the last 2 bytes of special space as an index type indicator, and
                                 90                 :                :  * restricting cycle ID lets btree use that space for vacuum cycle IDs
                                 91                 :                :  * while still allowing index type to be identified.
                                 92                 :                :  */
                                 93                 :                : #define MAX_BT_CYCLE_ID     0xFF7F
                                 94                 :                : 
                                 95                 :                : 
                                 96                 :                : /*
                                 97                 :                :  * The Meta page is always the first page in the btree index.
                                 98                 :                :  * Its primary purpose is to point to the location of the btree root page.
                                 99                 :                :  * We also point to the "fast" root, which is the current effective root;
                                100                 :                :  * see README for discussion.
                                101                 :                :  */
                                102                 :                : 
                                103                 :                : typedef struct BTMetaPageData
                                104                 :                : {
                                105                 :                :     uint32      btm_magic;      /* should contain BTREE_MAGIC */
                                106                 :                :     uint32      btm_version;    /* nbtree version (always <= BTREE_VERSION) */
                                107                 :                :     BlockNumber btm_root;       /* current root location */
                                108                 :                :     uint32      btm_level;      /* tree level of the root page */
                                109                 :                :     BlockNumber btm_fastroot;   /* current "fast" root location */
                                110                 :                :     uint32      btm_fastlevel;  /* tree level of the "fast" root page */
                                111                 :                :     /* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */
                                112                 :                : 
                                113                 :                :     /* number of deleted, non-recyclable pages during last cleanup */
                                114                 :                :     uint32      btm_last_cleanup_num_delpages;
                                115                 :                :     /* number of heap tuples during last cleanup (deprecated) */
                                116                 :                :     float8      btm_last_cleanup_num_heap_tuples;
                                117                 :                : 
                                118                 :                :     bool        btm_allequalimage;  /* are all columns "equalimage"? */
                                119                 :                : } BTMetaPageData;
                                120                 :                : 
                                121                 :                : #define BTPageGetMeta(p) \
                                122                 :                :     ((BTMetaPageData *) PageGetContents(p))
                                123                 :                : 
                                124                 :                : /*
                                125                 :                :  * The current Btree version is 4.  That's what you'll get when you create
                                126                 :                :  * a new index.
                                127                 :                :  *
                                128                 :                :  * Btree version 3 was used in PostgreSQL v11.  It is mostly the same as
                                129                 :                :  * version 4, but heap TIDs were not part of the keyspace.  Index tuples
                                130                 :                :  * with duplicate keys could be stored in any order.  We continue to
                                131                 :                :  * support reading and writing Btree versions 2 and 3, so that they don't
                                132                 :                :  * need to be immediately re-indexed at pg_upgrade.  In order to get the
                                133                 :                :  * new heapkeyspace semantics, however, a REINDEX is needed.
                                134                 :                :  *
                                135                 :                :  * Deduplication is safe to use when the btm_allequalimage field is set to
                                136                 :                :  * true.  It's safe to read the btm_allequalimage field on version 3, but
                                137                 :                :  * only version 4 indexes make use of deduplication.  Even version 4
                                138                 :                :  * indexes created on PostgreSQL v12 will need a REINDEX to make use of
                                139                 :                :  * deduplication, though, since there is no other way to set
                                140                 :                :  * btm_allequalimage to true (pg_upgrade hasn't been taught to set the
                                141                 :                :  * metapage field).
                                142                 :                :  *
                                143                 :                :  * Btree version 2 is mostly the same as version 3.  There are two new
                                144                 :                :  * fields in the metapage that were introduced in version 3.  A version 2
                                145                 :                :  * metapage will be automatically upgraded to version 3 on the first
                                146                 :                :  * insert to it.  INCLUDE indexes cannot use version 2.
                                147                 :                :  */
                                148                 :                : #define BTREE_METAPAGE  0       /* first page is meta */
                                149                 :                : #define BTREE_MAGIC     0x053162    /* magic number in metapage */
                                150                 :                : #define BTREE_VERSION   4       /* current version number */
                                151                 :                : #define BTREE_MIN_VERSION   2   /* minimum supported version */
                                152                 :                : #define BTREE_NOVAC_VERSION 3   /* version with all meta fields set */
                                153                 :                : 
                                154                 :                : /*
                                155                 :                :  * Maximum size of a btree index entry, including its tuple header.
                                156                 :                :  *
                                157                 :                :  * We actually need to be able to fit three items on every page,
                                158                 :                :  * so restrict any one item to 1/3 the per-page available space.
                                159                 :                :  *
                                160                 :                :  * There are rare cases where _bt_truncate() will need to enlarge
                                161                 :                :  * a heap index tuple to make space for a tiebreaker heap TID
                                162                 :                :  * attribute, which we account for here.
                                163                 :                :  */
                                164                 :                : #define BTMaxItemSize(page) \
                                165                 :                :     (MAXALIGN_DOWN((PageGetPageSize(page) - \
                                166                 :                :                     MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
                                167                 :                :                     MAXALIGN(sizeof(BTPageOpaqueData))) / 3) - \
                                168                 :                :                     MAXALIGN(sizeof(ItemPointerData)))
                                169                 :                : #define BTMaxItemSizeNoHeapTid(page) \
                                170                 :                :     MAXALIGN_DOWN((PageGetPageSize(page) - \
                                171                 :                :                    MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
                                172                 :                :                    MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
                                173                 :                : 
                                174                 :                : /*
                                175                 :                :  * MaxTIDsPerBTreePage is an upper bound on the number of heap TIDs tuples
                                176                 :                :  * that may be stored on a btree leaf page.  It is used to size the
                                177                 :                :  * per-page temporary buffers.
                                178                 :                :  *
                                179                 :                :  * Note: we don't bother considering per-tuple overheads here to keep
                                180                 :                :  * things simple (value is based on how many elements a single array of
                                181                 :                :  * heap TIDs must have to fill the space between the page header and
                                182                 :                :  * special area).  The value is slightly higher (i.e. more conservative)
                                183                 :                :  * than necessary as a result, which is considered acceptable.
                                184                 :                :  */
                                185                 :                : #define MaxTIDsPerBTreePage \
                                186                 :                :     (int) ((BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData)) / \
                                187                 :                :            sizeof(ItemPointerData))
                                188                 :                : 
                                189                 :                : /*
                                190                 :                :  * The leaf-page fillfactor defaults to 90% but is user-adjustable.
                                191                 :                :  * For pages above the leaf level, we use a fixed 70% fillfactor.
                                192                 :                :  * The fillfactor is applied during index build and when splitting
                                193                 :                :  * a rightmost page; when splitting non-rightmost pages we try to
                                194                 :                :  * divide the data equally.  When splitting a page that's entirely
                                195                 :                :  * filled with a single value (duplicates), the effective leaf-page
                                196                 :                :  * fillfactor is 96%, regardless of whether the page is a rightmost
                                197                 :                :  * page.
                                198                 :                :  */
                                199                 :                : #define BTREE_MIN_FILLFACTOR        10
                                200                 :                : #define BTREE_DEFAULT_FILLFACTOR    90
                                201                 :                : #define BTREE_NONLEAF_FILLFACTOR    70
                                202                 :                : #define BTREE_SINGLEVAL_FILLFACTOR  96
                                203                 :                : 
                                204                 :                : /*
                                205                 :                :  *  In general, the btree code tries to localize its knowledge about
                                206                 :                :  *  page layout to a couple of routines.  However, we need a special
                                207                 :                :  *  value to indicate "no page number" in those places where we expect
                                208                 :                :  *  page numbers.  We can use zero for this because we never need to
                                209                 :                :  *  make a pointer to the metadata page.
                                210                 :                :  */
                                211                 :                : 
                                212                 :                : #define P_NONE          0
                                213                 :                : 
                                214                 :                : /*
                                215                 :                :  * Macros to test whether a page is leftmost or rightmost on its tree level,
                                216                 :                :  * as well as other state info kept in the opaque data.
                                217                 :                :  */
                                218                 :                : #define P_LEFTMOST(opaque)      ((opaque)->btpo_prev == P_NONE)
                                219                 :                : #define P_RIGHTMOST(opaque)     ((opaque)->btpo_next == P_NONE)
                                220                 :                : #define P_ISLEAF(opaque)        (((opaque)->btpo_flags & BTP_LEAF) != 0)
                                221                 :                : #define P_ISROOT(opaque)        (((opaque)->btpo_flags & BTP_ROOT) != 0)
                                222                 :                : #define P_ISDELETED(opaque)     (((opaque)->btpo_flags & BTP_DELETED) != 0)
                                223                 :                : #define P_ISMETA(opaque)        (((opaque)->btpo_flags & BTP_META) != 0)
                                224                 :                : #define P_ISHALFDEAD(opaque)    (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)
                                225                 :                : #define P_IGNORE(opaque)        (((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0)
                                226                 :                : #define P_HAS_GARBAGE(opaque)   (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)
                                227                 :                : #define P_INCOMPLETE_SPLIT(opaque)  (((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0)
                                228                 :                : #define P_HAS_FULLXID(opaque)   (((opaque)->btpo_flags & BTP_HAS_FULLXID) != 0)
                                229                 :                : 
                                230                 :                : /*
                                231                 :                :  * BTDeletedPageData is the page contents of a deleted page
                                232                 :                :  */
                                233                 :                : typedef struct BTDeletedPageData
                                234                 :                : {
                                235                 :                :     FullTransactionId safexid;  /* See BTPageIsRecyclable() */
                                236                 :                : } BTDeletedPageData;
                                237                 :                : 
                                238                 :                : static inline void
 1145 pg@bowt.ie                239                 :CBC        3680 : BTPageSetDeleted(Page page, FullTransactionId safexid)
                                240                 :                : {
                                241                 :                :     BTPageOpaque opaque;
                                242                 :                :     PageHeader  header;
                                243                 :                :     BTDeletedPageData *contents;
                                244                 :                : 
  744 michael@paquier.xyz       245                 :           3680 :     opaque = BTPageGetOpaque(page);
 1145 pg@bowt.ie                246                 :           3680 :     header = ((PageHeader) page);
                                247                 :                : 
                                248                 :           3680 :     opaque->btpo_flags &= ~BTP_HALF_DEAD;
                                249                 :           3680 :     opaque->btpo_flags |= BTP_DELETED | BTP_HAS_FULLXID;
                                250                 :           3680 :     header->pd_lower = MAXALIGN(SizeOfPageHeaderData) +
                                251                 :                :         sizeof(BTDeletedPageData);
                                252                 :           3680 :     header->pd_upper = header->pd_special;
                                253                 :                : 
                                254                 :                :     /* Set safexid in deleted page */
                                255                 :           3680 :     contents = ((BTDeletedPageData *) PageGetContents(page));
                                256                 :           3680 :     contents->safexid = safexid;
                                257                 :           3680 : }
                                258                 :                : 
                                259                 :                : static inline FullTransactionId
                                260                 :            702 : BTPageGetDeleteXid(Page page)
                                261                 :                : {
                                262                 :                :     BTPageOpaque opaque;
                                263                 :                :     BTDeletedPageData *contents;
                                264                 :                : 
                                265                 :                :     /* We only expect to be called with a deleted page */
                                266         [ -  + ]:            702 :     Assert(!PageIsNew(page));
  744 michael@paquier.xyz       267                 :            702 :     opaque = BTPageGetOpaque(page);
 1145 pg@bowt.ie                268         [ -  + ]:            702 :     Assert(P_ISDELETED(opaque));
                                269                 :                : 
                                270                 :                :     /* pg_upgrade'd deleted page -- must be safe to delete now */
                                271         [ -  + ]:            702 :     if (!P_HAS_FULLXID(opaque))
 1145 pg@bowt.ie                272                 :UBC           0 :         return FirstNormalFullTransactionId;
                                273                 :                : 
                                274                 :                :     /* Get safexid from deleted page */
 1145 pg@bowt.ie                275                 :CBC         702 :     contents = ((BTDeletedPageData *) PageGetContents(page));
                                276                 :            702 :     return contents->safexid;
                                277                 :                : }
                                278                 :                : 
                                279                 :                : /*
                                280                 :                :  * Is an existing page recyclable?
                                281                 :                :  *
                                282                 :                :  * This exists to centralize the policy on which deleted pages are now safe to
                                283                 :                :  * re-use.  However, _bt_pendingfsm_finalize() duplicates some of the same
                                284                 :                :  * logic because it doesn't work directly with pages -- keep the two in sync.
                                285                 :                :  *
                                286                 :                :  * Note: PageIsNew() pages are always safe to recycle, but we can't deal with
                                287                 :                :  * them here (caller is responsible for that case themselves).  Caller might
                                288                 :                :  * well need special handling for new pages anyway.
                                289                 :                :  */
                                290                 :                : static inline bool
  377                           291                 :          10946 : BTPageIsRecyclable(Page page, Relation heaprel)
                                292                 :                : {
                                293                 :                :     BTPageOpaque opaque;
                                294                 :                : 
 1145                           295         [ -  + ]:          10946 :     Assert(!PageIsNew(page));
  309                           296         [ -  + ]:          10946 :     Assert(heaprel != NULL);
                                297                 :                : 
                                298                 :                :     /* Recycling okay iff page is deleted and safexid is old enough */
  744 michael@paquier.xyz       299                 :          10946 :     opaque = BTPageGetOpaque(page);
 1145 pg@bowt.ie                300         [ +  + ]:          10946 :     if (P_ISDELETED(opaque))
                                301                 :                :     {
  309                           302                 :            498 :         FullTransactionId safexid = BTPageGetDeleteXid(page);
                                303                 :                : 
                                304                 :                :         /*
                                305                 :                :          * The page was deleted, but when? If it was just deleted, a scan
                                306                 :                :          * might have seen the downlink to it, and will read the page later.
                                307                 :                :          * As long as that can happen, we must keep the deleted page around as
                                308                 :                :          * a tombstone.
                                309                 :                :          *
                                310                 :                :          * For that check if the deletion XID could still be visible to
                                311                 :                :          * anyone. If not, then no scan that's still in progress could have
                                312                 :                :          * seen its downlink, and we can recycle it.
                                313                 :                :          */
                                314                 :            498 :         return GlobalVisCheckRemovableFullXid(heaprel, safexid);
                                315                 :                :     }
                                316                 :                : 
 1145                           317                 :          10448 :     return false;
                                318                 :                : }
                                319                 :                : 
                                320                 :                : /*
                                321                 :                :  * BTVacState and BTPendingFSM are private nbtree.c state used during VACUUM.
                                322                 :                :  * They are exported for use by page deletion related code in nbtpage.c.
                                323                 :                :  */
                                324                 :                : typedef struct BTPendingFSM
                                325                 :                : {
                                326                 :                :     BlockNumber target;         /* Page deleted by current VACUUM */
                                327                 :                :     FullTransactionId safexid;  /* Page's BTDeletedPageData.safexid */
                                328                 :                : } BTPendingFSM;
                                329                 :                : 
                                330                 :                : typedef struct BTVacState
                                331                 :                : {
                                332                 :                :     IndexVacuumInfo *info;
                                333                 :                :     IndexBulkDeleteResult *stats;
                                334                 :                :     IndexBulkDeleteCallback callback;
                                335                 :                :     void       *callback_state;
                                336                 :                :     BTCycleId   cycleid;
                                337                 :                :     MemoryContext pagedelcontext;
                                338                 :                : 
                                339                 :                :     /*
                                340                 :                :      * _bt_pendingfsm_finalize() state
                                341                 :                :      */
                                342                 :                :     int         bufsize;        /* pendingpages space (in # elements) */
                                343                 :                :     int         maxbufsize;     /* max bufsize that respects work_mem */
                                344                 :                :     BTPendingFSM *pendingpages; /* One entry per newly deleted page */
                                345                 :                :     int         npendingpages;  /* current # valid pendingpages */
                                346                 :                : } BTVacState;
                                347                 :                : 
                                348                 :                : /*
                                349                 :                :  *  Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
                                350                 :                :  *  page.  The high key is not a tuple that is used to visit the heap.  It is
                                351                 :                :  *  a pivot tuple (see "Notes on B-Tree tuple format" below for definition).
                                352                 :                :  *  The high key on a page is required to be greater than or equal to any
                                353                 :                :  *  other key that appears on the page.  If we find ourselves trying to
                                354                 :                :  *  insert a key that is strictly > high key, we know we need to move right
                                355                 :                :  *  (this should only happen if the page was split since we examined the
                                356                 :                :  *  parent page).
                                357                 :                :  *
                                358                 :                :  *  Our insertion algorithm guarantees that we can use the initial least key
                                359                 :                :  *  on our right sibling as the high key.  Once a page is created, its high
                                360                 :                :  *  key changes only if the page is split.
                                361                 :                :  *
                                362                 :                :  *  On a non-rightmost page, the high key lives in item 1 and data items
                                363                 :                :  *  start in item 2.  Rightmost pages have no high key, so we store data
                                364                 :                :  *  items beginning in item 1.
                                365                 :                :  */
                                366                 :                : 
                                367                 :                : #define P_HIKEY             ((OffsetNumber) 1)
                                368                 :                : #define P_FIRSTKEY          ((OffsetNumber) 2)
                                369                 :                : #define P_FIRSTDATAKEY(opaque)  (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
                                370                 :                : 
                                371                 :                : /*
                                372                 :                :  * Notes on B-Tree tuple format, and key and non-key attributes:
                                373                 :                :  *
                                374                 :                :  * INCLUDE B-Tree indexes have non-key attributes.  These are extra
                                375                 :                :  * attributes that may be returned by index-only scans, but do not influence
                                376                 :                :  * the order of items in the index (formally, non-key attributes are not
                                377                 :                :  * considered to be part of the key space).  Non-key attributes are only
                                378                 :                :  * present in leaf index tuples whose item pointers actually point to heap
                                379                 :                :  * tuples (non-pivot tuples).  _bt_check_natts() enforces the rules
                                380                 :                :  * described here.
                                381                 :                :  *
                                382                 :                :  * Non-pivot tuple format (plain/non-posting variant):
                                383                 :                :  *
                                384                 :                :  *  t_tid | t_info | key values | INCLUDE columns, if any
                                385                 :                :  *
                                386                 :                :  * t_tid points to the heap TID, which is a tiebreaker key column as of
                                387                 :                :  * BTREE_VERSION 4.
                                388                 :                :  *
                                389                 :                :  * Non-pivot tuples complement pivot tuples, which only have key columns.
                                390                 :                :  * The sole purpose of pivot tuples is to represent how the key space is
                                391                 :                :  * separated.  In general, any B-Tree index that has more than one level
                                392                 :                :  * (i.e. any index that does not just consist of a metapage and a single
                                393                 :                :  * leaf root page) must have some number of pivot tuples, since pivot
                                394                 :                :  * tuples are used for traversing the tree.  Suffix truncation can omit
                                395                 :                :  * trailing key columns when a new pivot is formed, which makes minus
                                396                 :                :  * infinity their logical value.  Since BTREE_VERSION 4 indexes treat heap
                                397                 :                :  * TID as a trailing key column that ensures that all index tuples are
                                398                 :                :  * physically unique, it is necessary to represent heap TID as a trailing
                                399                 :                :  * key column in pivot tuples, though very often this can be truncated
                                400                 :                :  * away, just like any other key column. (Actually, the heap TID is
                                401                 :                :  * omitted rather than truncated, since its representation is different to
                                402                 :                :  * the non-pivot representation.)
                                403                 :                :  *
                                404                 :                :  * Pivot tuple format:
                                405                 :                :  *
                                406                 :                :  *  t_tid | t_info | key values | [heap TID]
                                407                 :                :  *
                                408                 :                :  * We store the number of columns present inside pivot tuples by abusing
                                409                 :                :  * their t_tid offset field, since pivot tuples never need to store a real
                                410                 :                :  * offset (pivot tuples generally store a downlink in t_tid, though).  The
                                411                 :                :  * offset field only stores the number of columns/attributes when the
                                412                 :                :  * INDEX_ALT_TID_MASK bit is set, which doesn't count the trailing heap
                                413                 :                :  * TID column sometimes stored in pivot tuples -- that's represented by
                                414                 :                :  * the presence of BT_PIVOT_HEAP_TID_ATTR.  The INDEX_ALT_TID_MASK bit in
                                415                 :                :  * t_info is always set on BTREE_VERSION 4 pivot tuples, since
                                416                 :                :  * BTreeTupleIsPivot() must work reliably on heapkeyspace versions.
                                417                 :                :  *
                                418                 :                :  * In version 2 or version 3 (!heapkeyspace) indexes, INDEX_ALT_TID_MASK
                                419                 :                :  * might not be set in pivot tuples.  BTreeTupleIsPivot() won't work
                                420                 :                :  * reliably as a result.  The number of columns stored is implicitly the
                                421                 :                :  * same as the number of columns in the index, just like any non-pivot
                                422                 :                :  * tuple. (The number of columns stored should not vary, since suffix
                                423                 :                :  * truncation of key columns is unsafe within any !heapkeyspace index.)
                                424                 :                :  *
                                425                 :                :  * The 12 least significant bits from t_tid's offset number are used to
                                426                 :                :  * represent the number of key columns within a pivot tuple.  This leaves 4
                                427                 :                :  * status bits (BT_STATUS_OFFSET_MASK bits), which are shared by all tuples
                                428                 :                :  * that have the INDEX_ALT_TID_MASK bit set (set in t_info) to store basic
                                429                 :                :  * tuple metadata.  BTreeTupleIsPivot() and BTreeTupleIsPosting() use the
                                430                 :                :  * BT_STATUS_OFFSET_MASK bits.
                                431                 :                :  *
                                432                 :                :  * Sometimes non-pivot tuples also use a representation that repurposes
                                433                 :                :  * t_tid to store metadata rather than a TID.  PostgreSQL v13 introduced a
                                434                 :                :  * new non-pivot tuple format to support deduplication: posting list
                                435                 :                :  * tuples.  Deduplication merges together multiple equal non-pivot tuples
                                436                 :                :  * into a logically equivalent, space efficient representation.  A posting
                                437                 :                :  * list is an array of ItemPointerData elements.  Non-pivot tuples are
                                438                 :                :  * merged together to form posting list tuples lazily, at the point where
                                439                 :                :  * we'd otherwise have to split a leaf page.
                                440                 :                :  *
                                441                 :                :  * Posting tuple format (alternative non-pivot tuple representation):
                                442                 :                :  *
                                443                 :                :  *  t_tid | t_info | key values | posting list (TID array)
                                444                 :                :  *
                                445                 :                :  * Posting list tuples are recognized as such by having the
                                446                 :                :  * INDEX_ALT_TID_MASK status bit set in t_info and the BT_IS_POSTING status
                                447                 :                :  * bit set in t_tid's offset number.  These flags redefine the content of
                                448                 :                :  * the posting tuple's t_tid to store the location of the posting list
                                449                 :                :  * (instead of a block number), as well as the total number of heap TIDs
                                450                 :                :  * present in the tuple (instead of a real offset number).
                                451                 :                :  *
                                452                 :                :  * The 12 least significant bits from t_tid's offset number are used to
                                453                 :                :  * represent the number of heap TIDs present in the tuple, leaving 4 status
                                454                 :                :  * bits (the BT_STATUS_OFFSET_MASK bits).  Like any non-pivot tuple, the
                                455                 :                :  * number of columns stored is always implicitly the total number in the
                                456                 :                :  * index (in practice there can never be non-key columns stored, since
                                457                 :                :  * deduplication is not supported with INCLUDE indexes).
                                458                 :                :  */
                                459                 :                : #define INDEX_ALT_TID_MASK          INDEX_AM_RESERVED_BIT
                                460                 :                : 
                                461                 :                : /* Item pointer offset bit masks */
                                462                 :                : #define BT_OFFSET_MASK              0x0FFF
                                463                 :                : #define BT_STATUS_OFFSET_MASK       0xF000
                                464                 :                : /* BT_STATUS_OFFSET_MASK status bits */
                                465                 :                : #define BT_PIVOT_HEAP_TID_ATTR      0x1000
                                466                 :                : #define BT_IS_POSTING               0x2000
                                467                 :                : 
                                468                 :                : /*
                                469                 :                :  * Mask allocated for number of keys in index tuple must be able to fit
                                470                 :                :  * maximum possible number of index attributes
                                471                 :                :  */
                                472                 :                : StaticAssertDecl(BT_OFFSET_MASK >= INDEX_MAX_KEYS,
                                473                 :                :                  "BT_OFFSET_MASK can't fit INDEX_MAX_KEYS");
                                474                 :                : 
                                475                 :                : /*
                                476                 :                :  * Note: BTreeTupleIsPivot() can have false negatives (but not false
                                477                 :                :  * positives) when used with !heapkeyspace indexes
                                478                 :                :  */
                                479                 :                : static inline bool
 1509                           480                 :      517565445 : BTreeTupleIsPivot(IndexTuple itup)
                                481                 :                : {
                                482         [ +  + ]:      517565445 :     if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
                                483                 :      348505726 :         return false;
                                484                 :                :     /* absence of BT_IS_POSTING in offset number indicates pivot tuple */
                                485         [ +  + ]:      169059719 :     if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) != 0)
                                486                 :        7507314 :         return false;
                                487                 :                : 
                                488                 :      161552405 :     return true;
                                489                 :                : }
                                490                 :                : 
                                491                 :                : static inline bool
                                492                 :      401910226 : BTreeTupleIsPosting(IndexTuple itup)
                                493                 :                : {
                                494         [ +  + ]:      401910226 :     if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
                                495                 :      244502094 :         return false;
                                496                 :                :     /* presence of BT_IS_POSTING in offset number indicates posting tuple */
                                497         [ +  + ]:      157408132 :     if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) == 0)
                                498                 :      118360111 :         return false;
                                499                 :                : 
                                500                 :       39048021 :     return true;
                                501                 :                : }
                                502                 :                : 
                                503                 :                : static inline void
 1453                           504                 :         205540 : BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
                                505                 :                : {
                                506         [ -  + ]:         205540 :     Assert(nhtids > 1);
                                507         [ -  + ]:         205540 :     Assert((nhtids & BT_STATUS_OFFSET_MASK) == 0);
 1504                           508         [ -  + ]:         205540 :     Assert((size_t) postingoffset == MAXALIGN(postingoffset));
 1509                           509         [ -  + ]:         205540 :     Assert(postingoffset < INDEX_SIZE_MASK);
 1453                           510         [ -  + ]:         205540 :     Assert(!BTreeTupleIsPivot(itup));
                                511                 :                : 
 1509                           512                 :         205540 :     itup->t_info |= INDEX_ALT_TID_MASK;
                                513                 :         205540 :     ItemPointerSetOffsetNumber(&itup->t_tid, (nhtids | BT_IS_POSTING));
                                514                 :         205540 :     ItemPointerSetBlockNumber(&itup->t_tid, postingoffset);
                                515                 :         205540 : }
                                516                 :                : 
                                517                 :                : static inline uint16
                                518                 :       14078504 : BTreeTupleGetNPosting(IndexTuple posting)
                                519                 :                : {
                                520                 :                :     OffsetNumber existing;
                                521                 :                : 
                                522         [ -  + ]:       14078504 :     Assert(BTreeTupleIsPosting(posting));
                                523                 :                : 
                                524                 :       14078504 :     existing = ItemPointerGetOffsetNumberNoCheck(&posting->t_tid);
                                525                 :       14078504 :     return (existing & BT_OFFSET_MASK);
                                526                 :                : }
                                527                 :                : 
                                528                 :                : static inline uint32
                                529                 :       15781222 : BTreeTupleGetPostingOffset(IndexTuple posting)
                                530                 :                : {
                                531         [ -  + ]:       15781222 :     Assert(BTreeTupleIsPosting(posting));
                                532                 :                : 
                                533                 :       15781222 :     return ItemPointerGetBlockNumberNoCheck(&posting->t_tid);
                                534                 :                : }
                                535                 :                : 
                                536                 :                : static inline ItemPointer
                                537                 :       14584104 : BTreeTupleGetPosting(IndexTuple posting)
                                538                 :                : {
                                539                 :       29168208 :     return (ItemPointer) ((char *) posting +
                                540                 :       14584104 :                           BTreeTupleGetPostingOffset(posting));
                                541                 :                : }
                                542                 :                : 
                                543                 :                : static inline ItemPointer
                                544                 :       12766648 : BTreeTupleGetPostingN(IndexTuple posting, int n)
                                545                 :                : {
                                546                 :       12766648 :     return BTreeTupleGetPosting(posting) + n;
                                547                 :                : }
                                548                 :                : 
                                549                 :                : /*
                                550                 :                :  * Get/set downlink block number in pivot tuple.
                                551                 :                :  *
                                552                 :                :  * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
                                553                 :                :  * !heapkeyspace indexes would exhibit false positive assertion failures.
                                554                 :                :  */
                                555                 :                : static inline BlockNumber
                                556                 :        8054155 : BTreeTupleGetDownLink(IndexTuple pivot)
                                557                 :                : {
                                558                 :        8054155 :     return ItemPointerGetBlockNumberNoCheck(&pivot->t_tid);
                                559                 :                : }
                                560                 :                : 
                                561                 :                : static inline void
                                562                 :          34441 : BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
                                563                 :                : {
                                564                 :          34441 :     ItemPointerSetBlockNumber(&pivot->t_tid, blkno);
                                565                 :          34441 : }
                                566                 :                : 
                                567                 :                : /*
                                568                 :                :  * Get number of attributes within tuple.
                                569                 :                :  *
                                570                 :                :  * Note that this does not include an implicit tiebreaker heap TID
                                571                 :                :  * attribute, if any.  Note also that the number of key attributes must be
                                572                 :                :  * explicitly represented in all heapkeyspace pivot tuples.
                                573                 :                :  *
                                574                 :                :  * Note: This is defined as a macro rather than an inline function to
                                575                 :                :  * avoid including rel.h.
                                576                 :                :  */
                                577                 :                : #define BTreeTupleGetNAtts(itup, rel)   \
                                578                 :                :     ( \
                                579                 :                :         (BTreeTupleIsPivot(itup)) ? \
                                580                 :                :         ( \
                                581                 :                :             ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_OFFSET_MASK \
                                582                 :                :         ) \
                                583                 :                :         : \
                                584                 :                :         IndexRelationGetNumberOfAttributes(rel) \
                                585                 :                :     )
                                586                 :                : 
                                587                 :                : /*
                                588                 :                :  * Set number of key attributes in tuple.
                                589                 :                :  *
                                590                 :                :  * The heap TID tiebreaker attribute bit may also be set here, indicating that
                                591                 :                :  * a heap TID value will be stored at the end of the tuple (i.e. using the
                                592                 :                :  * special pivot tuple representation).
                                593                 :                :  */
                                594                 :                : static inline void
 1468                           595                 :          40111 : BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid)
                                596                 :                : {
                                597         [ -  + ]:          40111 :     Assert(nkeyatts <= INDEX_MAX_KEYS);
 1453                           598         [ -  + ]:          40111 :     Assert((nkeyatts & BT_STATUS_OFFSET_MASK) == 0);
 1468                           599   [ +  +  -  + ]:          40111 :     Assert(!heaptid || nkeyatts > 0);
                                600   [ +  +  -  + ]:          40111 :     Assert(!BTreeTupleIsPivot(itup) || nkeyatts == 0);
                                601                 :                : 
 1509                           602                 :          40111 :     itup->t_info |= INDEX_ALT_TID_MASK;
                                603                 :                : 
 1468                           604         [ +  + ]:          40111 :     if (heaptid)
                                605                 :            526 :         nkeyatts |= BT_PIVOT_HEAP_TID_ATTR;
                                606                 :                : 
                                607                 :                :     /* BT_IS_POSTING bit is deliberately unset here */
                                608                 :          40111 :     ItemPointerSetOffsetNumber(&itup->t_tid, nkeyatts);
                                609         [ -  + ]:          40111 :     Assert(BTreeTupleIsPivot(itup));
 1509                           610                 :          40111 : }
                                611                 :                : 
                                612                 :                : /*
                                613                 :                :  * Get/set leaf page's "top parent" link from its high key.  Used during page
                                614                 :                :  * deletion.
                                615                 :                :  *
                                616                 :                :  * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
                                617                 :                :  * !heapkeyspace indexes would exhibit false positive assertion failures.
                                618                 :                :  */
                                619                 :                : static inline BlockNumber
                                620                 :           2990 : BTreeTupleGetTopParent(IndexTuple leafhikey)
                                621                 :                : {
                                622                 :           2990 :     return ItemPointerGetBlockNumberNoCheck(&leafhikey->t_tid);
                                623                 :                : }
                                624                 :                : 
                                625                 :                : static inline void
                                626                 :           3681 : BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
                                627                 :                : {
                                628                 :           3681 :     ItemPointerSetBlockNumber(&leafhikey->t_tid, blkno);
 1468                           629                 :           3681 :     BTreeTupleSetNAtts(leafhikey, 0, false);
 1509                           630                 :           3681 : }
                                631                 :                : 
                                632                 :                : /*
                                633                 :                :  * Get tiebreaker heap TID attribute, if any.
                                634                 :                :  *
                                635                 :                :  * This returns the first/lowest heap TID in the case of a posting list tuple.
                                636                 :                :  */
                                637                 :                : static inline ItemPointer
                                638                 :       54231129 : BTreeTupleGetHeapTID(IndexTuple itup)
                                639                 :                : {
                                640         [ +  + ]:       54231129 :     if (BTreeTupleIsPivot(itup))
                                641                 :                :     {
                                642                 :                :         /* Pivot tuple heap TID representation? */
                                643         [ +  + ]:       38096741 :         if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
                                644                 :                :              BT_PIVOT_HEAP_TID_ATTR) != 0)
                                645                 :         311468 :             return (ItemPointer) ((char *) itup + IndexTupleSize(itup) -
                                646                 :                :                                   sizeof(ItemPointerData));
                                647                 :                : 
                                648                 :                :         /* Heap TID attribute was truncated */
                                649                 :       37785273 :         return NULL;
                                650                 :                :     }
                                651         [ +  + ]:       16134388 :     else if (BTreeTupleIsPosting(itup))
                                652                 :         637633 :         return BTreeTupleGetPosting(itup);
                                653                 :                : 
                                654                 :       15496755 :     return &itup->t_tid;
                                655                 :                : }
                                656                 :                : 
                                657                 :                : /*
                                658                 :                :  * Get maximum heap TID attribute, which could be the only TID in the case of
                                659                 :                :  * a non-pivot tuple that does not have a posting list tuple.
                                660                 :                :  *
                                661                 :                :  * Works with non-pivot tuples only.
                                662                 :                :  */
                                663                 :                : static inline ItemPointer
                                664                 :         141676 : BTreeTupleGetMaxHeapTID(IndexTuple itup)
                                665                 :                : {
                                666         [ -  + ]:         141676 :     Assert(!BTreeTupleIsPivot(itup));
                                667                 :                : 
                                668         [ +  + ]:         141676 :     if (BTreeTupleIsPosting(itup))
                                669                 :                :     {
                                670                 :         141168 :         uint16      nposting = BTreeTupleGetNPosting(itup);
                                671                 :                : 
                                672                 :         141168 :         return BTreeTupleGetPostingN(itup, nposting - 1);
                                673                 :                :     }
                                674                 :                : 
                                675                 :            508 :     return &itup->t_tid;
                                676                 :                : }
                                677                 :                : 
                                678                 :                : /*
                                679                 :                :  *  Operator strategy numbers for B-tree have been moved to access/stratnum.h,
                                680                 :                :  *  because many places need to use them in ScanKeyInit() calls.
                                681                 :                :  *
                                682                 :                :  *  The strategy numbers are chosen so that we can commute them by
                                683                 :                :  *  subtraction, thus:
                                684                 :                :  */
                                685                 :                : #define BTCommuteStrategyNumber(strat)  (BTMaxStrategyNumber + 1 - (strat))
                                686                 :                : 
                                687                 :                : /*
                                688                 :                :  *  When a new operator class is declared, we require that the user
                                689                 :                :  *  supply us with an amproc procedure (BTORDER_PROC) for determining
                                690                 :                :  *  whether, for two keys a and b, a < b, a = b, or a > b.  This routine
                                691                 :                :  *  must return < 0, 0, > 0, respectively, in these three cases.
                                692                 :                :  *
                                693                 :                :  *  To facilitate accelerated sorting, an operator class may choose to
                                694                 :                :  *  offer a second procedure (BTSORTSUPPORT_PROC).  For full details, see
                                695                 :                :  *  src/include/utils/sortsupport.h.
                                696                 :                :  *
                                697                 :                :  *  To support window frames defined by "RANGE offset PRECEDING/FOLLOWING",
                                698                 :                :  *  an operator class may choose to offer a third amproc procedure
                                699                 :                :  *  (BTINRANGE_PROC), independently of whether it offers sortsupport.
                                700                 :                :  *  For full details, see doc/src/sgml/btree.sgml.
                                701                 :                :  *
                                702                 :                :  *  To facilitate B-Tree deduplication, an operator class may choose to
                                703                 :                :  *  offer a forth amproc procedure (BTEQUALIMAGE_PROC).  For full details,
                                704                 :                :  *  see doc/src/sgml/btree.sgml.
                                705                 :                :  */
                                706                 :                : 
                                707                 :                : #define BTORDER_PROC        1
                                708                 :                : #define BTSORTSUPPORT_PROC  2
                                709                 :                : #define BTINRANGE_PROC      3
                                710                 :                : #define BTEQUALIMAGE_PROC   4
                                711                 :                : #define BTOPTIONS_PROC      5
                                712                 :                : #define BTNProcs            5
                                713                 :                : 
                                714                 :                : /*
                                715                 :                :  *  We need to be able to tell the difference between read and write
                                716                 :                :  *  requests for pages, in order to do locking correctly.
                                717                 :                :  */
                                718                 :                : 
                                719                 :                : #define BT_READ         BUFFER_LOCK_SHARE
                                720                 :                : #define BT_WRITE        BUFFER_LOCK_EXCLUSIVE
                                721                 :                : 
                                722                 :                : /*
                                723                 :                :  * BTStackData -- As we descend a tree, we push the location of pivot
                                724                 :                :  * tuples whose downlink we are about to follow onto a private stack.  If
                                725                 :                :  * we split a leaf, we use this stack to walk back up the tree and insert
                                726                 :                :  * data into its parent page at the correct location.  We also have to
                                727                 :                :  * recursively insert into the grandparent page if and when the parent page
                                728                 :                :  * splits.  Our private stack can become stale due to concurrent page
                                729                 :                :  * splits and page deletions, but it should never give us an irredeemably
                                730                 :                :  * bad picture.
                                731                 :                :  */
                                732                 :                : typedef struct BTStackData
                                733                 :                : {
                                734                 :                :     BlockNumber bts_blkno;
                                735                 :                :     OffsetNumber bts_offset;
                                736                 :                :     struct BTStackData *bts_parent;
                                737                 :                : } BTStackData;
                                738                 :                : 
                                739                 :                : typedef BTStackData *BTStack;
                                740                 :                : 
                                741                 :                : /*
                                742                 :                :  * BTScanInsertData is the btree-private state needed to find an initial
                                743                 :                :  * position for an indexscan, or to insert new tuples -- an "insertion
                                744                 :                :  * scankey" (not to be confused with a search scankey).  It's used to descend
                                745                 :                :  * a B-Tree using _bt_search.
                                746                 :                :  *
                                747                 :                :  * heapkeyspace indicates if we expect all keys in the index to be physically
                                748                 :                :  * unique because heap TID is used as a tiebreaker attribute, and if index may
                                749                 :                :  * have truncated key attributes in pivot tuples.  This is actually a property
                                750                 :                :  * of the index relation itself (not an indexscan).  heapkeyspace indexes are
                                751                 :                :  * indexes whose version is >= version 4.  It's convenient to keep this close
                                752                 :                :  * by, rather than accessing the metapage repeatedly.
                                753                 :                :  *
                                754                 :                :  * allequalimage is set to indicate that deduplication is safe for the index.
                                755                 :                :  * This is also a property of the index relation rather than an indexscan.
                                756                 :                :  *
                                757                 :                :  * anynullkeys indicates if any of the keys had NULL value when scankey was
                                758                 :                :  * built from index tuple (note that already-truncated tuple key attributes
                                759                 :                :  * set NULL as a placeholder key value, which also affects value of
                                760                 :                :  * anynullkeys).  This is a convenience for unique index non-pivot tuple
                                761                 :                :  * insertion, which usually temporarily unsets scantid, but shouldn't iff
                                762                 :                :  * anynullkeys is true.  Value generally matches non-pivot tuple's HasNulls
                                763                 :                :  * bit, but may not when inserting into an INCLUDE index (tuple header value
                                764                 :                :  * is affected by the NULL-ness of both key and non-key attributes).
                                765                 :                :  *
                                766                 :                :  * See comments in _bt_first for an explanation of the nextkey and backward
                                767                 :                :  * fields.
                                768                 :                :  *
                                769                 :                :  * scantid is the heap TID that is used as a final tiebreaker attribute.  It
                                770                 :                :  * is set to NULL when index scan doesn't need to find a position for a
                                771                 :                :  * specific physical tuple.  Must be set when inserting new tuples into
                                772                 :                :  * heapkeyspace indexes, since every tuple in the tree unambiguously belongs
                                773                 :                :  * in one exact position (it's never set with !heapkeyspace indexes, though).
                                774                 :                :  * Despite the representational difference, nbtree search code considers
                                775                 :                :  * scantid to be just another insertion scankey attribute.
                                776                 :                :  *
                                777                 :                :  * scankeys is an array of scan key entries for attributes that are compared
                                778                 :                :  * before scantid (user-visible attributes).  keysz is the size of the array.
                                779                 :                :  * During insertion, there must be a scan key for every attribute, but when
                                780                 :                :  * starting a regular index scan some can be omitted.  The array is used as a
                                781                 :                :  * flexible array member, though it's sized in a way that makes it possible to
                                782                 :                :  * use stack allocations.  See nbtree/README for full details.
                                783                 :                :  */
                                784                 :                : typedef struct BTScanInsertData
                                785                 :                : {
                                786                 :                :     bool        heapkeyspace;
                                787                 :                :     bool        allequalimage;
                                788                 :                :     bool        anynullkeys;
                                789                 :                :     bool        nextkey;
                                790                 :                :     bool        backward;       /* backward index scan? */
                                791                 :                :     ItemPointer scantid;        /* tiebreaker for scankeys */
                                792                 :                :     int         keysz;          /* Size of scankeys array */
                                793                 :                :     ScanKeyData scankeys[INDEX_MAX_KEYS];   /* Must appear last */
                                794                 :                : } BTScanInsertData;
                                795                 :                : 
                                796                 :                : typedef BTScanInsertData *BTScanInsert;
                                797                 :                : 
                                798                 :                : /*
                                799                 :                :  * BTInsertStateData is a working area used during insertion.
                                800                 :                :  *
                                801                 :                :  * This is filled in after descending the tree to the first leaf page the new
                                802                 :                :  * tuple might belong on.  Tracks the current position while performing
                                803                 :                :  * uniqueness check, before we have determined which exact page to insert
                                804                 :                :  * to.
                                805                 :                :  *
                                806                 :                :  * (This should be private to nbtinsert.c, but it's also used by
                                807                 :                :  * _bt_binsrch_insert)
                                808                 :                :  */
                                809                 :                : typedef struct BTInsertStateData
                                810                 :                : {
                                811                 :                :     IndexTuple  itup;           /* Item we're inserting */
                                812                 :                :     Size        itemsz;         /* Size of itup -- should be MAXALIGN()'d */
                                813                 :                :     BTScanInsert itup_key;      /* Insertion scankey */
                                814                 :                : 
                                815                 :                :     /* Buffer containing leaf page we're likely to insert itup on */
                                816                 :                :     Buffer      buf;
                                817                 :                : 
                                818                 :                :     /*
                                819                 :                :      * Cache of bounds within the current buffer.  Only used for insertions
                                820                 :                :      * where _bt_check_unique is called.  See _bt_binsrch_insert and
                                821                 :                :      * _bt_findinsertloc for details.
                                822                 :                :      */
                                823                 :                :     bool        bounds_valid;
                                824                 :                :     OffsetNumber low;
                                825                 :                :     OffsetNumber stricthigh;
                                826                 :                : 
                                827                 :                :     /*
                                828                 :                :      * if _bt_binsrch_insert found the location inside existing posting list,
                                829                 :                :      * save the position inside the list.  -1 sentinel value indicates overlap
                                830                 :                :      * with an existing posting list tuple that has its LP_DEAD bit set.
                                831                 :                :      */
                                832                 :                :     int         postingoff;
                                833                 :                : } BTInsertStateData;
                                834                 :                : 
                                835                 :                : typedef BTInsertStateData *BTInsertState;
                                836                 :                : 
                                837                 :                : /*
                                838                 :                :  * State used to representing an individual pending tuple during
                                839                 :                :  * deduplication.
                                840                 :                :  */
                                841                 :                : typedef struct BTDedupInterval
                                842                 :                : {
                                843                 :                :     OffsetNumber baseoff;
                                844                 :                :     uint16      nitems;
                                845                 :                : } BTDedupInterval;
                                846                 :                : 
                                847                 :                : /*
                                848                 :                :  * BTDedupStateData is a working area used during deduplication.
                                849                 :                :  *
                                850                 :                :  * The status info fields track the state of a whole-page deduplication pass.
                                851                 :                :  * State about the current pending posting list is also tracked.
                                852                 :                :  *
                                853                 :                :  * A pending posting list is comprised of a contiguous group of equal items
                                854                 :                :  * from the page, starting from page offset number 'baseoff'.  This is the
                                855                 :                :  * offset number of the "base" tuple for new posting list.  'nitems' is the
                                856                 :                :  * current total number of existing items from the page that will be merged to
                                857                 :                :  * make a new posting list tuple, including the base tuple item.  (Existing
                                858                 :                :  * items may themselves be posting list tuples, or regular non-pivot tuples.)
                                859                 :                :  *
                                860                 :                :  * The total size of the existing tuples to be freed when pending posting list
                                861                 :                :  * is processed gets tracked by 'phystupsize'.  This information allows
                                862                 :                :  * deduplication to calculate the space saving for each new posting list
                                863                 :                :  * tuple, and for the entire pass over the page as a whole.
                                864                 :                :  */
                                865                 :                : typedef struct BTDedupStateData
                                866                 :                : {
                                867                 :                :     /* Deduplication status info for entire pass over page */
                                868                 :                :     bool        deduplicate;    /* Still deduplicating page? */
                                869                 :                :     int         nmaxitems;      /* Number of max-sized tuples so far */
                                870                 :                :     Size        maxpostingsize; /* Limit on size of final tuple */
                                871                 :                : 
                                872                 :                :     /* Metadata about base tuple of current pending posting list */
                                873                 :                :     IndexTuple  base;           /* Use to form new posting list */
                                874                 :                :     OffsetNumber baseoff;       /* page offset of base */
                                875                 :                :     Size        basetupsize;    /* base size without original posting list */
                                876                 :                : 
                                877                 :                :     /* Other metadata about pending posting list */
                                878                 :                :     ItemPointer htids;          /* Heap TIDs in pending posting list */
                                879                 :                :     int         nhtids;         /* Number of heap TIDs in htids array */
                                880                 :                :     int         nitems;         /* Number of existing tuples/line pointers */
                                881                 :                :     Size        phystupsize;    /* Includes line pointer overhead */
                                882                 :                : 
                                883                 :                :     /*
                                884                 :                :      * Array of tuples to go on new version of the page.  Contains one entry
                                885                 :                :      * for each group of consecutive items.  Note that existing tuples that
                                886                 :                :      * will not become posting list tuples do not appear in the array (they
                                887                 :                :      * are implicitly unchanged by deduplication pass).
                                888                 :                :      */
                                889                 :                :     int         nintervals;     /* current number of intervals in array */
                                890                 :                :     BTDedupInterval intervals[MaxIndexTuplesPerPage];
                                891                 :                : } BTDedupStateData;
                                892                 :                : 
                                893                 :                : typedef BTDedupStateData *BTDedupState;
                                894                 :                : 
                                895                 :                : /*
                                896                 :                :  * BTVacuumPostingData is state that represents how to VACUUM (or delete) a
                                897                 :                :  * posting list tuple when some (though not all) of its TIDs are to be
                                898                 :                :  * deleted.
                                899                 :                :  *
                                900                 :                :  * Convention is that itup field is the original posting list tuple on input,
                                901                 :                :  * and palloc()'d final tuple used to overwrite existing tuple on output.
                                902                 :                :  */
                                903                 :                : typedef struct BTVacuumPostingData
                                904                 :                : {
                                905                 :                :     /* Tuple that will be/was updated */
                                906                 :                :     IndexTuple  itup;
                                907                 :                :     OffsetNumber updatedoffset;
                                908                 :                : 
                                909                 :                :     /* State needed to describe final itup in WAL */
                                910                 :                :     uint16      ndeletedtids;
                                911                 :                :     uint16      deletetids[FLEXIBLE_ARRAY_MEMBER];
                                912                 :                : } BTVacuumPostingData;
                                913                 :                : 
                                914                 :                : typedef BTVacuumPostingData *BTVacuumPosting;
                                915                 :                : 
                                916                 :                : /*
                                917                 :                :  * BTScanOpaqueData is the btree-private state needed for an indexscan.
                                918                 :                :  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
                                919                 :                :  * details of the preprocessing), information about the current location
                                920                 :                :  * of the scan, and information about the marked location, if any.  (We use
                                921                 :                :  * BTScanPosData to represent the data needed for each of current and marked
                                922                 :                :  * locations.)  In addition we can remember some known-killed index entries
                                923                 :                :  * that must be marked before we can move off the current page.
                                924                 :                :  *
                                925                 :                :  * Index scans work a page at a time: we pin and read-lock the page, identify
                                926                 :                :  * all the matching items on the page and save them in BTScanPosData, then
                                927                 :                :  * release the read-lock while returning the items to the caller for
                                928                 :                :  * processing.  This approach minimizes lock/unlock traffic.  Note that we
                                929                 :                :  * keep the pin on the index page until the caller is done with all the items
                                930                 :                :  * (this is needed for VACUUM synchronization, see nbtree/README).  When we
                                931                 :                :  * are ready to step to the next page, if the caller has told us any of the
                                932                 :                :  * items were killed, we re-lock the page to mark them killed, then unlock.
                                933                 :                :  * Finally we drop the pin and step to the next page in the appropriate
                                934                 :                :  * direction.
                                935                 :                :  *
                                936                 :                :  * If we are doing an index-only scan, we save the entire IndexTuple for each
                                937                 :                :  * matched item, otherwise only its heap TID and offset.  The IndexTuples go
                                938                 :                :  * into a separate workspace array; each BTScanPosItem stores its tuple's
                                939                 :                :  * offset within that array.  Posting list tuples store a "base" tuple once,
                                940                 :                :  * allowing the same key to be returned for each TID in the posting list
                                941                 :                :  * tuple.
                                942                 :                :  */
                                943                 :                : 
                                944                 :                : typedef struct BTScanPosItem    /* what we remember about each match */
                                945                 :                : {
                                946                 :                :     ItemPointerData heapTid;    /* TID of referenced heap item */
                                947                 :                :     OffsetNumber indexOffset;   /* index item's location within page */
                                948                 :                :     LocationIndex tupleOffset;  /* IndexTuple's offset in workspace, if any */
                                949                 :                : } BTScanPosItem;
                                950                 :                : 
                                951                 :                : typedef struct BTScanPosData
                                952                 :                : {
                                953                 :                :     Buffer      buf;            /* if valid, the buffer is pinned */
                                954                 :                : 
                                955                 :                :     XLogRecPtr  lsn;            /* pos in the WAL stream when page was read */
                                956                 :                :     BlockNumber currPage;       /* page referenced by items array */
                                957                 :                :     BlockNumber nextPage;       /* page's right link when we scanned it */
                                958                 :                : 
                                959                 :                :     /*
                                960                 :                :      * moreLeft and moreRight track whether we think there may be matching
                                961                 :                :      * index entries to the left and right of the current page, respectively.
                                962                 :                :      * We can clear the appropriate one of these flags when _bt_checkkeys()
                                963                 :                :      * sets BTReadPageState.continuescan = false.
                                964                 :                :      */
                                965                 :                :     bool        moreLeft;
                                966                 :                :     bool        moreRight;
                                967                 :                : 
                                968                 :                :     /*
                                969                 :                :      * Direction of the scan at the time that _bt_readpage was called.
                                970                 :                :      *
                                971                 :                :      * Used by btrestrpos to "restore" the scan's array keys by resetting each
                                972                 :                :      * array to its first element's value (first in this scan direction). This
                                973                 :                :      * avoids the need to directly track the array keys in btmarkpos.
                                974                 :                :      */
                                975                 :                :     ScanDirection dir;
                                976                 :                : 
                                977                 :                :     /*
                                978                 :                :      * If we are doing an index-only scan, nextTupleOffset is the first free
                                979                 :                :      * location in the associated tuple storage workspace.
                                980                 :                :      */
                                981                 :                :     int         nextTupleOffset;
                                982                 :                : 
                                983                 :                :     /*
                                984                 :                :      * The items array is always ordered in index order (ie, increasing
                                985                 :                :      * indexoffset).  When scanning backwards it is convenient to fill the
                                986                 :                :      * array back-to-front, so we start at the last slot and fill downwards.
                                987                 :                :      * Hence we need both a first-valid-entry and a last-valid-entry counter.
                                988                 :                :      * itemIndex is a cursor showing which entry was last returned to caller.
                                989                 :                :      */
                                990                 :                :     int         firstItem;      /* first valid index in items[] */
                                991                 :                :     int         lastItem;       /* last valid index in items[] */
                                992                 :                :     int         itemIndex;      /* current index in items[] */
                                993                 :                : 
                                994                 :                :     BTScanPosItem items[MaxTIDsPerBTreePage];   /* MUST BE LAST */
                                995                 :                : } BTScanPosData;
                                996                 :                : 
                                997                 :                : typedef BTScanPosData *BTScanPos;
                                998                 :                : 
                                999                 :                : #define BTScanPosIsPinned(scanpos) \
                               1000                 :                : ( \
                               1001                 :                :     AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                               1002                 :                :                 !BufferIsValid((scanpos).buf)), \
                               1003                 :                :     BufferIsValid((scanpos).buf) \
                               1004                 :                : )
                               1005                 :                : #define BTScanPosUnpin(scanpos) \
                               1006                 :                :     do { \
                               1007                 :                :         ReleaseBuffer((scanpos).buf); \
                               1008                 :                :         (scanpos).buf = InvalidBuffer; \
                               1009                 :                :     } while (0)
                               1010                 :                : #define BTScanPosUnpinIfPinned(scanpos) \
                               1011                 :                :     do { \
                               1012                 :                :         if (BTScanPosIsPinned(scanpos)) \
                               1013                 :                :             BTScanPosUnpin(scanpos); \
                               1014                 :                :     } while (0)
                               1015                 :                : 
                               1016                 :                : #define BTScanPosIsValid(scanpos) \
                               1017                 :                : ( \
                               1018                 :                :     AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                               1019                 :                :                 !BufferIsValid((scanpos).buf)), \
                               1020                 :                :     BlockNumberIsValid((scanpos).currPage) \
                               1021                 :                : )
                               1022                 :                : #define BTScanPosInvalidate(scanpos) \
                               1023                 :                :     do { \
                               1024                 :                :         (scanpos).currPage = InvalidBlockNumber; \
                               1025                 :                :         (scanpos).nextPage = InvalidBlockNumber; \
                               1026                 :                :         (scanpos).buf = InvalidBuffer; \
                               1027                 :                :         (scanpos).lsn = InvalidXLogRecPtr; \
                               1028                 :                :         (scanpos).nextTupleOffset = 0; \
                               1029                 :                :     } while (0)
                               1030                 :                : 
                               1031                 :                : /* We need one of these for each equality-type SK_SEARCHARRAY scan key */
                               1032                 :                : typedef struct BTArrayKeyInfo
                               1033                 :                : {
                               1034                 :                :     int         scan_key;       /* index of associated key in keyData */
                               1035                 :                :     int         cur_elem;       /* index of current element in elem_values */
                               1036                 :                :     int         num_elems;      /* number of elems in current array value */
                               1037                 :                :     Datum      *elem_values;    /* array of num_elems Datums */
                               1038                 :                : } BTArrayKeyInfo;
                               1039                 :                : 
                               1040                 :                : typedef struct BTScanOpaqueData
                               1041                 :                : {
                               1042                 :                :     /* these fields are set by _bt_preprocess_keys(): */
                               1043                 :                :     bool        qual_ok;        /* false if qual can never be satisfied */
                               1044                 :                :     int         numberOfKeys;   /* number of preprocessed scan keys */
                               1045                 :                :     ScanKey     keyData;        /* array of preprocessed scan keys */
                               1046                 :                : 
                               1047                 :                :     /* workspace for SK_SEARCHARRAY support */
                               1048                 :                :     int         numArrayKeys;   /* number of equality-type array keys */
                               1049                 :                :     bool        needPrimScan;   /* New prim scan to continue in current dir? */
                               1050                 :                :     bool        scanBehind;     /* Last array advancement matched -inf attr? */
                               1051                 :                :     BTArrayKeyInfo *arrayKeys;  /* info about each equality-type array key */
                               1052                 :                :     FmgrInfo   *orderProcs;     /* ORDER procs for required equality keys */
                               1053                 :                :     MemoryContext arrayContext; /* scan-lifespan context for array data */
                               1054                 :                : 
                               1055                 :                :     /* info about killed items if any (killedItems is NULL if never used) */
                               1056                 :                :     int        *killedItems;    /* currPos.items indexes of killed items */
                               1057                 :                :     int         numKilled;      /* number of currently stored items */
                               1058                 :                : 
                               1059                 :                :     /*
                               1060                 :                :      * If we are doing an index-only scan, these are the tuple storage
                               1061                 :                :      * workspaces for the currPos and markPos respectively.  Each is of size
                               1062                 :                :      * BLCKSZ, so it can hold as much as a full page's worth of tuples.
                               1063                 :                :      */
                               1064                 :                :     char       *currTuples;     /* tuple storage for currPos */
                               1065                 :                :     char       *markTuples;     /* tuple storage for markPos */
                               1066                 :                : 
                               1067                 :                :     /*
                               1068                 :                :      * If the marked position is on the same page as current position, we
                               1069                 :                :      * don't use markPos, but just keep the marked itemIndex in markItemIndex
                               1070                 :                :      * (all the rest of currPos is valid for the mark position). Hence, to
                               1071                 :                :      * determine if there is a mark, first look at markItemIndex, then at
                               1072                 :                :      * markPos.
                               1073                 :                :      */
                               1074                 :                :     int         markItemIndex;  /* itemIndex, or -1 if not valid */
                               1075                 :                : 
                               1076                 :                :     /* keep these last in struct for efficiency */
                               1077                 :                :     BTScanPosData currPos;      /* current position data */
                               1078                 :                :     BTScanPosData markPos;      /* marked position, if any */
                               1079                 :                : } BTScanOpaqueData;
                               1080                 :                : 
                               1081                 :                : typedef BTScanOpaqueData *BTScanOpaque;
                               1082                 :                : 
                               1083                 :                : /*
                               1084                 :                :  * _bt_readpage state used across _bt_checkkeys calls for a page
                               1085                 :                :  */
                               1086                 :                : typedef struct BTReadPageState
                               1087                 :                : {
                               1088                 :                :     /* Input parameters, set by _bt_readpage for _bt_checkkeys */
                               1089                 :                :     ScanDirection dir;          /* current scan direction */
                               1090                 :                :     OffsetNumber minoff;        /* Lowest non-pivot tuple's offset */
                               1091                 :                :     OffsetNumber maxoff;        /* Highest non-pivot tuple's offset */
                               1092                 :                :     IndexTuple  finaltup;       /* Needed by scans with array keys */
                               1093                 :                :     BlockNumber prev_scan_page; /* previous _bt_parallel_release block */
                               1094                 :                :     Page        page;           /* Page being read */
                               1095                 :                : 
                               1096                 :                :     /* Per-tuple input parameters, set by _bt_readpage for _bt_checkkeys */
                               1097                 :                :     OffsetNumber offnum;        /* current tuple's page offset number */
                               1098                 :                : 
                               1099                 :                :     /* Output parameter, set by _bt_checkkeys for _bt_readpage */
                               1100                 :                :     OffsetNumber skip;          /* Array keys "look ahead" skip offnum */
                               1101                 :                :     bool        continuescan;   /* Terminate ongoing (primitive) index scan? */
                               1102                 :                : 
                               1103                 :                :     /*
                               1104                 :                :      * Input and output parameters, set and unset by both _bt_readpage and
                               1105                 :                :      * _bt_checkkeys to manage precheck optimizations
                               1106                 :                :      */
                               1107                 :                :     bool        prechecked;     /* precheck set continuescan to 'true'? */
                               1108                 :                :     bool        firstmatch;     /* at least one match so far?  */
                               1109                 :                : 
                               1110                 :                :     /*
                               1111                 :                :      * Private _bt_checkkeys state used to manage "look ahead" optimization
                               1112                 :                :      * (only used during scans with array keys)
                               1113                 :                :      */
                               1114                 :                :     int16       rechecks;
                               1115                 :                :     int16       targetdistance;
                               1116                 :                : 
                               1117                 :                : } BTReadPageState;
                               1118                 :                : 
                               1119                 :                : /*
                               1120                 :                :  * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
                               1121                 :                :  * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
                               1122                 :                :  * index's indoption[] array entry for the index attribute.
                               1123                 :                :  */
                               1124                 :                : #define SK_BT_REQFWD    0x00010000  /* required to continue forward scan */
                               1125                 :                : #define SK_BT_REQBKWD   0x00020000  /* required to continue backward scan */
                               1126                 :                : #define SK_BT_INDOPTION_SHIFT  24   /* must clear the above bits */
                               1127                 :                : #define SK_BT_DESC          (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT)
                               1128                 :                : #define SK_BT_NULLS_FIRST   (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT)
                               1129                 :                : 
                               1130                 :                : typedef struct BTOptions
                               1131                 :                : {
                               1132                 :                :     int32       varlena_header_;    /* varlena header (do not touch directly!) */
                               1133                 :                :     int         fillfactor;     /* page fill factor in percent (0..100) */
                               1134                 :                :     float8      vacuum_cleanup_index_scale_factor;  /* deprecated */
                               1135                 :                :     bool        deduplicate_items;  /* Try to deduplicate items? */
                               1136                 :                : } BTOptions;
                               1137                 :                : 
                               1138                 :                : #define BTGetFillFactor(relation) \
                               1139                 :                :     (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
                               1140                 :                :                  relation->rd_rel->relam == BTREE_AM_OID), \
                               1141                 :                :      (relation)->rd_options ? \
                               1142                 :                :      ((BTOptions *) (relation)->rd_options)->fillfactor : \
                               1143                 :                :      BTREE_DEFAULT_FILLFACTOR)
                               1144                 :                : #define BTGetTargetPageFreeSpace(relation) \
                               1145                 :                :     (BLCKSZ * (100 - BTGetFillFactor(relation)) / 100)
                               1146                 :                : #define BTGetDeduplicateItems(relation) \
                               1147                 :                :     (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
                               1148                 :                :                  relation->rd_rel->relam == BTREE_AM_OID), \
                               1149                 :                :     ((relation)->rd_options ? \
                               1150                 :                :      ((BTOptions *) (relation)->rd_options)->deduplicate_items : true))
                               1151                 :                : 
                               1152                 :                : /*
                               1153                 :                :  * Constant definition for progress reporting.  Phase numbers must match
                               1154                 :                :  * btbuildphasename.
                               1155                 :                :  */
                               1156                 :                : /* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
                               1157                 :                : #define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN       2
                               1158                 :                : #define PROGRESS_BTREE_PHASE_PERFORMSORT_1              3
                               1159                 :                : #define PROGRESS_BTREE_PHASE_PERFORMSORT_2              4
                               1160                 :                : #define PROGRESS_BTREE_PHASE_LEAF_LOAD                  5
                               1161                 :                : 
                               1162                 :                : /*
                               1163                 :                :  * external entry points for btree, in nbtree.c
                               1164                 :                :  */
                               1165                 :                : extern void btbuildempty(Relation index);
                               1166                 :                : extern bool btinsert(Relation rel, Datum *values, bool *isnull,
                               1167                 :                :                      ItemPointer ht_ctid, Relation heapRel,
                               1168                 :                :                      IndexUniqueCheck checkUnique,
                               1169                 :                :                      bool indexUnchanged,
                               1170                 :                :                      struct IndexInfo *indexInfo);
                               1171                 :                : extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
                               1172                 :                : extern Size btestimateparallelscan(int nkeys, int norderbys);
                               1173                 :                : extern void btinitparallelscan(void *target);
                               1174                 :                : extern bool btgettuple(IndexScanDesc scan, ScanDirection dir);
                               1175                 :                : extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
                               1176                 :                : extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
                               1177                 :                :                      ScanKey orderbys, int norderbys);
                               1178                 :                : extern void btparallelrescan(IndexScanDesc scan);
                               1179                 :                : extern void btendscan(IndexScanDesc scan);
                               1180                 :                : extern void btmarkpos(IndexScanDesc scan);
                               1181                 :                : extern void btrestrpos(IndexScanDesc scan);
                               1182                 :                : extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info,
                               1183                 :                :                                            IndexBulkDeleteResult *stats,
                               1184                 :                :                                            IndexBulkDeleteCallback callback,
                               1185                 :                :                                            void *callback_state);
                               1186                 :                : extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info,
                               1187                 :                :                                               IndexBulkDeleteResult *stats);
                               1188                 :                : extern bool btcanreturn(Relation index, int attno);
                               1189                 :                : 
                               1190                 :                : /*
                               1191                 :                :  * prototypes for internal functions in nbtree.c
                               1192                 :                :  */
                               1193                 :                : extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno,
                               1194                 :                :                                bool first);
                               1195                 :                : extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
                               1196                 :                : extern void _bt_parallel_done(IndexScanDesc scan);
                               1197                 :                : extern void _bt_parallel_primscan_schedule(IndexScanDesc scan,
                               1198                 :                :                                            BlockNumber prev_scan_page);
                               1199                 :                : 
                               1200                 :                : /*
                               1201                 :                :  * prototypes for functions in nbtdedup.c
                               1202                 :                :  */
                               1203                 :                : extern void _bt_dedup_pass(Relation rel, Buffer buf, IndexTuple newitem,
                               1204                 :                :                            Size newitemsz, bool bottomupdedup);
                               1205                 :                : extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel,
                               1206                 :                :                                  Size newitemsz);
                               1207                 :                : extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base,
                               1208                 :                :                                     OffsetNumber baseoff);
                               1209                 :                : extern bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup);
                               1210                 :                : extern Size _bt_dedup_finish_pending(Page newpage, BTDedupState state);
                               1211                 :                : extern IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids,
                               1212                 :                :                                    int nhtids);
                               1213                 :                : extern void _bt_update_posting(BTVacuumPosting vacposting);
                               1214                 :                : extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting,
                               1215                 :                :                                    int postingoff);
                               1216                 :                : 
                               1217                 :                : /*
                               1218                 :                :  * prototypes for functions in nbtinsert.c
                               1219                 :                :  */
                               1220                 :                : extern bool _bt_doinsert(Relation rel, IndexTuple itup,
                               1221                 :                :                          IndexUniqueCheck checkUnique, bool indexUnchanged,
                               1222                 :                :                          Relation heapRel);
                               1223                 :                : extern void _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf,
                               1224                 :                :                              BTStack stack);
                               1225                 :                : extern Buffer _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack,
                               1226                 :                :                               BlockNumber child);
                               1227                 :                : 
                               1228                 :                : /*
                               1229                 :                :  * prototypes for functions in nbtsplitloc.c
                               1230                 :                :  */
                               1231                 :                : extern OffsetNumber _bt_findsplitloc(Relation rel, Page origpage,
                               1232                 :                :                                      OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
                               1233                 :                :                                      bool *newitemonleft);
                               1234                 :                : 
                               1235                 :                : /*
                               1236                 :                :  * prototypes for functions in nbtpage.c
                               1237                 :                :  */
                               1238                 :                : extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
                               1239                 :                :                              bool allequalimage);
                               1240                 :                : extern bool _bt_vacuum_needs_cleanup(Relation rel);
                               1241                 :                : extern void _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages);
                               1242                 :                : extern void _bt_upgrademetapage(Page page);
                               1243                 :                : extern Buffer _bt_getroot(Relation rel, Relation heaprel, int access);
                               1244                 :                : extern Buffer _bt_gettrueroot(Relation rel);
                               1245                 :                : extern int  _bt_getrootheight(Relation rel);
                               1246                 :                : extern void _bt_metaversion(Relation rel, bool *heapkeyspace,
                               1247                 :                :                             bool *allequalimage);
                               1248                 :                : extern void _bt_checkpage(Relation rel, Buffer buf);
                               1249                 :                : extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
                               1250                 :                : extern Buffer _bt_allocbuf(Relation rel, Relation heaprel);
                               1251                 :                : extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
                               1252                 :                :                                BlockNumber blkno, int access);
                               1253                 :                : extern void _bt_relbuf(Relation rel, Buffer buf);
                               1254                 :                : extern void _bt_lockbuf(Relation rel, Buffer buf, int access);
                               1255                 :                : extern void _bt_unlockbuf(Relation rel, Buffer buf);
                               1256                 :                : extern bool _bt_conditionallockbuf(Relation rel, Buffer buf);
                               1257                 :                : extern void _bt_upgradelockbufcleanup(Relation rel, Buffer buf);
                               1258                 :                : extern void _bt_pageinit(Page page, Size size);
                               1259                 :                : extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
                               1260                 :                :                                 OffsetNumber *deletable, int ndeletable,
                               1261                 :                :                                 BTVacuumPosting *updatable, int nupdatable);
                               1262                 :                : extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
                               1263                 :                :                                       Relation heapRel,
                               1264                 :                :                                       TM_IndexDeleteOp *delstate);
                               1265                 :                : extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
                               1266                 :                : extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate,
                               1267                 :                :                                 bool cleanuponly);
                               1268                 :                : extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
                               1269                 :                : 
                               1270                 :                : /*
                               1271                 :                :  * prototypes for functions in nbtsearch.c
                               1272                 :                :  */
                               1273                 :                : extern BTStack _bt_search(Relation rel, Relation heaprel, BTScanInsert key,
                               1274                 :                :                           Buffer *bufP, int access);
                               1275                 :                : extern Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
                               1276                 :                :                             Buffer buf, bool forupdate, BTStack stack,
                               1277                 :                :                             int access);
                               1278                 :                : extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
                               1279                 :                : extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
                               1280                 :                : extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
                               1281                 :                : extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
                               1282                 :                : extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
                               1283                 :                : 
                               1284                 :                : /*
                               1285                 :                :  * prototypes for functions in nbtutils.c
                               1286                 :                :  */
                               1287                 :                : extern BTScanInsert _bt_mkscankey(Relation rel, IndexTuple itup);
                               1288                 :                : extern void _bt_freestack(BTStack stack);
                               1289                 :                : extern bool _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir);
                               1290                 :                : extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
                               1291                 :                : extern void _bt_preprocess_keys(IndexScanDesc scan);
                               1292                 :                : extern bool _bt_checkkeys(IndexScanDesc scan, BTReadPageState *pstate, bool arrayKeys,
                               1293                 :                :                           IndexTuple tuple, int tupnatts);
                               1294                 :                : extern void _bt_killitems(IndexScanDesc scan);
                               1295                 :                : extern BTCycleId _bt_vacuum_cycleid(Relation rel);
                               1296                 :                : extern BTCycleId _bt_start_vacuum(Relation rel);
                               1297                 :                : extern void _bt_end_vacuum(Relation rel);
                               1298                 :                : extern void _bt_end_vacuum_callback(int code, Datum arg);
                               1299                 :                : extern Size BTreeShmemSize(void);
                               1300                 :                : extern void BTreeShmemInit(void);
                               1301                 :                : extern bytea *btoptions(Datum reloptions, bool validate);
                               1302                 :                : extern bool btproperty(Oid index_oid, int attno,
                               1303                 :                :                        IndexAMProperty prop, const char *propname,
                               1304                 :                :                        bool *res, bool *isnull);
                               1305                 :                : extern char *btbuildphasename(int64 phasenum);
                               1306                 :                : extern IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft,
                               1307                 :                :                                IndexTuple firstright, BTScanInsert itup_key);
                               1308                 :                : extern int  _bt_keep_natts_fast(Relation rel, IndexTuple lastleft,
                               1309                 :                :                                 IndexTuple firstright);
                               1310                 :                : extern bool _bt_check_natts(Relation rel, bool heapkeyspace, Page page,
                               1311                 :                :                             OffsetNumber offnum);
                               1312                 :                : extern void _bt_check_third_page(Relation rel, Relation heap,
                               1313                 :                :                                  bool needheaptidspace, Page page, IndexTuple newtup);
                               1314                 :                : extern bool _bt_allequalimage(Relation rel, bool debugmessage);
                               1315                 :                : 
                               1316                 :                : /*
                               1317                 :                :  * prototypes for functions in nbtvalidate.c
                               1318                 :                :  */
                               1319                 :                : extern bool btvalidate(Oid opclassoid);
                               1320                 :                : extern void btadjustmembers(Oid opfamilyoid,
                               1321                 :                :                             Oid opclassoid,
                               1322                 :                :                             List *operators,
                               1323                 :                :                             List *functions);
                               1324                 :                : 
                               1325                 :                : /*
                               1326                 :                :  * prototypes for functions in nbtsort.c
                               1327                 :                :  */
                               1328                 :                : extern IndexBuildResult *btbuild(Relation heap, Relation index,
                               1329                 :                :                                  struct IndexInfo *indexInfo);
                               1330                 :                : extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc);
                               1331                 :                : 
                               1332                 :                : #endif                          /* NBTREE_H */
        

Generated by: LCOV version 2.1-beta2-3-g6141622