LCOV - differential code coverage report
Current view: top level - src/include/access - nbtree.h (source / functions) Coverage Total Hit UIC GBC GIC GNC CBC ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 98.9 % 94 93 1 1 44 2 46 45 2
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 17 17 16 1 17
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 2 2 2
Legend: Lines: hit not hit (240..) days: 98.9 % 92 91 1 1 44 46 45
Function coverage date bins:
[..60] days: 100.0 % 1 1 1
(240..) days: 48.5 % 33 16 16 17

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * nbtree.h
                                  4                 :  *    header file for postgres btree access method implementation.
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  8                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :  *
                                 10                 :  * src/include/access/nbtree.h
                                 11                 :  *
                                 12                 :  *-------------------------------------------------------------------------
                                 13                 :  */
                                 14                 : #ifndef NBTREE_H
                                 15                 : #define NBTREE_H
                                 16                 : 
                                 17                 : #include "access/amapi.h"
                                 18                 : #include "access/itup.h"
                                 19                 : #include "access/sdir.h"
                                 20                 : #include "access/tableam.h"
                                 21                 : #include "access/xlogreader.h"
                                 22                 : #include "catalog/pg_am_d.h"
                                 23                 : #include "catalog/pg_index.h"
                                 24                 : #include "lib/stringinfo.h"
                                 25                 : #include "storage/bufmgr.h"
                                 26                 : #include "storage/shm_toc.h"
                                 27                 : 
                                 28                 : /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
                                 29                 : typedef uint16 BTCycleId;
                                 30                 : 
                                 31                 : /*
                                 32                 :  *  BTPageOpaqueData -- At the end of every page, we store a pointer
                                 33                 :  *  to both siblings in the tree.  This is used to do forward/backward
                                 34                 :  *  index scans.  The next-page link is also critical for recovery when
                                 35                 :  *  a search has navigated to the wrong page due to concurrent page splits
                                 36                 :  *  or deletions; see src/backend/access/nbtree/README for more info.
                                 37                 :  *
                                 38                 :  *  In addition, we store the page's btree level (counting upwards from
                                 39                 :  *  zero at a leaf page) as well as some flag bits indicating the page type
                                 40                 :  *  and status.  If the page is deleted, a BTDeletedPageData struct is stored
                                 41                 :  *  in the page's tuple area, while a standard BTPageOpaqueData struct is
                                 42                 :  *  stored in the page special area.
                                 43                 :  *
                                 44                 :  *  We also store a "vacuum cycle ID".  When a page is split while VACUUM is
                                 45                 :  *  processing the index, a nonzero value associated with the VACUUM run is
                                 46                 :  *  stored into both halves of the split page.  (If VACUUM is not running,
                                 47                 :  *  both pages receive zero cycleids.)  This allows VACUUM to detect whether
                                 48                 :  *  a page was split since it started, with a small probability of false match
                                 49                 :  *  if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs
                                 50                 :  *  ago.  Also, during a split, the BTP_SPLIT_END flag is cleared in the left
                                 51                 :  *  (original) page, and set in the right page, but only if the next page
                                 52                 :  *  to its right has a different cycleid.
                                 53                 :  *
                                 54                 :  *  NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
                                 55                 :  *  instead.
                                 56                 :  *
                                 57                 :  *  NOTE: the btpo_level field used to be a union type in order to allow
                                 58                 :  *  deleted pages to store a 32-bit safexid in the same field.  We now store
                                 59                 :  *  64-bit/full safexid values using BTDeletedPageData instead.
                                 60                 :  */
                                 61                 : 
                                 62                 : typedef struct BTPageOpaqueData
                                 63                 : {
                                 64                 :     BlockNumber btpo_prev;      /* left sibling, or P_NONE if leftmost */
                                 65                 :     BlockNumber btpo_next;      /* right sibling, or P_NONE if rightmost */
                                 66                 :     uint32      btpo_level;     /* tree level --- zero for leaf pages */
                                 67                 :     uint16      btpo_flags;     /* flag bits, see below */
                                 68                 :     BTCycleId   btpo_cycleid;   /* vacuum cycle ID of latest split */
                                 69                 : } BTPageOpaqueData;
                                 70                 : 
                                 71                 : typedef BTPageOpaqueData *BTPageOpaque;
                                 72                 : 
                                 73                 : #define BTPageGetOpaque(page) ((BTPageOpaque) PageGetSpecialPointer(page))
                                 74                 : 
                                 75                 : /* Bits defined in btpo_flags */
                                 76                 : #define BTP_LEAF        (1 << 0)  /* leaf page, i.e. not internal page */
                                 77                 : #define BTP_ROOT        (1 << 1)  /* root page (has no parent) */
                                 78                 : #define BTP_DELETED     (1 << 2)  /* page has been deleted from tree */
                                 79                 : #define BTP_META        (1 << 3)  /* meta-page */
                                 80                 : #define BTP_HALF_DEAD   (1 << 4)  /* empty, but still in tree */
                                 81                 : #define BTP_SPLIT_END   (1 << 5)  /* rightmost page of split group */
                                 82                 : #define BTP_HAS_GARBAGE (1 << 6)  /* page has LP_DEAD tuples (deprecated) */
                                 83                 : #define BTP_INCOMPLETE_SPLIT (1 << 7) /* right sibling's downlink is missing */
                                 84                 : #define BTP_HAS_FULLXID (1 << 8)  /* contains BTDeletedPageData */
                                 85                 : 
                                 86                 : /*
                                 87                 :  * The max allowed value of a cycle ID is a bit less than 64K.  This is
                                 88                 :  * for convenience of pg_filedump and similar utilities: we want to use
                                 89                 :  * the last 2 bytes of special space as an index type indicator, and
                                 90                 :  * restricting cycle ID lets btree use that space for vacuum cycle IDs
                                 91                 :  * while still allowing index type to be identified.
                                 92                 :  */
                                 93                 : #define MAX_BT_CYCLE_ID     0xFF7F
                                 94                 : 
                                 95                 : 
                                 96                 : /*
                                 97                 :  * The Meta page is always the first page in the btree index.
                                 98                 :  * Its primary purpose is to point to the location of the btree root page.
                                 99                 :  * We also point to the "fast" root, which is the current effective root;
                                100                 :  * see README for discussion.
                                101                 :  */
                                102                 : 
                                103                 : typedef struct BTMetaPageData
                                104                 : {
                                105                 :     uint32      btm_magic;      /* should contain BTREE_MAGIC */
                                106                 :     uint32      btm_version;    /* nbtree version (always <= BTREE_VERSION) */
                                107                 :     BlockNumber btm_root;       /* current root location */
                                108                 :     uint32      btm_level;      /* tree level of the root page */
                                109                 :     BlockNumber btm_fastroot;   /* current "fast" root location */
                                110                 :     uint32      btm_fastlevel;  /* tree level of the "fast" root page */
                                111                 :     /* remaining fields only valid when btm_version >= BTREE_NOVAC_VERSION */
                                112                 : 
                                113                 :     /* number of deleted, non-recyclable pages during last cleanup */
                                114                 :     uint32      btm_last_cleanup_num_delpages;
                                115                 :     /* number of heap tuples during last cleanup (deprecated) */
                                116                 :     float8      btm_last_cleanup_num_heap_tuples;
                                117                 : 
                                118                 :     bool        btm_allequalimage;  /* are all columns "equalimage"? */
                                119                 : } BTMetaPageData;
                                120                 : 
                                121                 : #define BTPageGetMeta(p) \
                                122                 :     ((BTMetaPageData *) PageGetContents(p))
                                123                 : 
                                124                 : /*
                                125                 :  * The current Btree version is 4.  That's what you'll get when you create
                                126                 :  * a new index.
                                127                 :  *
                                128                 :  * Btree version 3 was used in PostgreSQL v11.  It is mostly the same as
                                129                 :  * version 4, but heap TIDs were not part of the keyspace.  Index tuples
                                130                 :  * with duplicate keys could be stored in any order.  We continue to
                                131                 :  * support reading and writing Btree versions 2 and 3, so that they don't
                                132                 :  * need to be immediately re-indexed at pg_upgrade.  In order to get the
                                133                 :  * new heapkeyspace semantics, however, a REINDEX is needed.
                                134                 :  *
                                135                 :  * Deduplication is safe to use when the btm_allequalimage field is set to
                                136                 :  * true.  It's safe to read the btm_allequalimage field on version 3, but
                                137                 :  * only version 4 indexes make use of deduplication.  Even version 4
                                138                 :  * indexes created on PostgreSQL v12 will need a REINDEX to make use of
                                139                 :  * deduplication, though, since there is no other way to set
                                140                 :  * btm_allequalimage to true (pg_upgrade hasn't been taught to set the
                                141                 :  * metapage field).
                                142                 :  *
                                143                 :  * Btree version 2 is mostly the same as version 3.  There are two new
                                144                 :  * fields in the metapage that were introduced in version 3.  A version 2
                                145                 :  * metapage will be automatically upgraded to version 3 on the first
                                146                 :  * insert to it.  INCLUDE indexes cannot use version 2.
                                147                 :  */
                                148                 : #define BTREE_METAPAGE  0       /* first page is meta */
                                149                 : #define BTREE_MAGIC     0x053162    /* magic number in metapage */
                                150                 : #define BTREE_VERSION   4       /* current version number */
                                151                 : #define BTREE_MIN_VERSION   2   /* minimum supported version */
                                152                 : #define BTREE_NOVAC_VERSION 3   /* version with all meta fields set */
                                153                 : 
                                154                 : /*
                                155                 :  * Maximum size of a btree index entry, including its tuple header.
                                156                 :  *
                                157                 :  * We actually need to be able to fit three items on every page,
                                158                 :  * so restrict any one item to 1/3 the per-page available space.
                                159                 :  *
                                160                 :  * There are rare cases where _bt_truncate() will need to enlarge
                                161                 :  * a heap index tuple to make space for a tiebreaker heap TID
                                162                 :  * attribute, which we account for here.
                                163                 :  */
                                164                 : #define BTMaxItemSize(page) \
                                165                 :     (MAXALIGN_DOWN((PageGetPageSize(page) - \
                                166                 :                     MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
                                167                 :                     MAXALIGN(sizeof(BTPageOpaqueData))) / 3) - \
                                168                 :                     MAXALIGN(sizeof(ItemPointerData)))
                                169                 : #define BTMaxItemSizeNoHeapTid(page) \
                                170                 :     MAXALIGN_DOWN((PageGetPageSize(page) - \
                                171                 :                    MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
                                172                 :                    MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
                                173                 : 
                                174                 : /*
                                175                 :  * MaxTIDsPerBTreePage is an upper bound on the number of heap TIDs tuples
                                176                 :  * that may be stored on a btree leaf page.  It is used to size the
                                177                 :  * per-page temporary buffers.
                                178                 :  *
                                179                 :  * Note: we don't bother considering per-tuple overheads here to keep
                                180                 :  * things simple (value is based on how many elements a single array of
                                181                 :  * heap TIDs must have to fill the space between the page header and
                                182                 :  * special area).  The value is slightly higher (i.e. more conservative)
                                183                 :  * than necessary as a result, which is considered acceptable.
                                184                 :  */
                                185                 : #define MaxTIDsPerBTreePage \
                                186                 :     (int) ((BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData)) / \
                                187                 :            sizeof(ItemPointerData))
                                188                 : 
                                189                 : /*
                                190                 :  * The leaf-page fillfactor defaults to 90% but is user-adjustable.
                                191                 :  * For pages above the leaf level, we use a fixed 70% fillfactor.
                                192                 :  * The fillfactor is applied during index build and when splitting
                                193                 :  * a rightmost page; when splitting non-rightmost pages we try to
                                194                 :  * divide the data equally.  When splitting a page that's entirely
                                195                 :  * filled with a single value (duplicates), the effective leaf-page
                                196                 :  * fillfactor is 96%, regardless of whether the page is a rightmost
                                197                 :  * page.
                                198                 :  */
                                199                 : #define BTREE_MIN_FILLFACTOR        10
                                200                 : #define BTREE_DEFAULT_FILLFACTOR    90
                                201                 : #define BTREE_NONLEAF_FILLFACTOR    70
                                202                 : #define BTREE_SINGLEVAL_FILLFACTOR  96
                                203                 : 
                                204                 : /*
                                205                 :  *  In general, the btree code tries to localize its knowledge about
                                206                 :  *  page layout to a couple of routines.  However, we need a special
                                207                 :  *  value to indicate "no page number" in those places where we expect
                                208                 :  *  page numbers.  We can use zero for this because we never need to
                                209                 :  *  make a pointer to the metadata page.
                                210                 :  */
                                211                 : 
                                212                 : #define P_NONE          0
                                213                 : 
                                214                 : /*
                                215                 :  * Macros to test whether a page is leftmost or rightmost on its tree level,
                                216                 :  * as well as other state info kept in the opaque data.
                                217                 :  */
                                218                 : #define P_LEFTMOST(opaque)      ((opaque)->btpo_prev == P_NONE)
                                219                 : #define P_RIGHTMOST(opaque)     ((opaque)->btpo_next == P_NONE)
                                220                 : #define P_ISLEAF(opaque)        (((opaque)->btpo_flags & BTP_LEAF) != 0)
                                221                 : #define P_ISROOT(opaque)        (((opaque)->btpo_flags & BTP_ROOT) != 0)
                                222                 : #define P_ISDELETED(opaque)     (((opaque)->btpo_flags & BTP_DELETED) != 0)
                                223                 : #define P_ISMETA(opaque)        (((opaque)->btpo_flags & BTP_META) != 0)
                                224                 : #define P_ISHALFDEAD(opaque)    (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)
                                225                 : #define P_IGNORE(opaque)        (((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0)
                                226                 : #define P_HAS_GARBAGE(opaque)   (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)
                                227                 : #define P_INCOMPLETE_SPLIT(opaque)  (((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0)
                                228                 : #define P_HAS_FULLXID(opaque)   (((opaque)->btpo_flags & BTP_HAS_FULLXID) != 0)
                                229                 : 
                                230                 : /*
                                231                 :  * BTDeletedPageData is the page contents of a deleted page
                                232                 :  */
                                233                 : typedef struct BTDeletedPageData
                                234                 : {
                                235                 :     FullTransactionId safexid;  /* See BTPageIsRecyclable() */
                                236                 : } BTDeletedPageData;
                                237                 : 
  774 pg                        238 ECB             : static inline void
  774 pg                        239 GIC        3420 : BTPageSetDeleted(Page page, FullTransactionId safexid)
                                240                 : {
                                241                 :     BTPageOpaque opaque;
                                242                 :     PageHeader  header;
                                243                 :     BTDeletedPageData *contents;
  774 pg                        244 ECB             : 
  373 michael                   245 CBC        3420 :     opaque = BTPageGetOpaque(page);
  774 pg                        246 GIC        3420 :     header = ((PageHeader) page);
  774 pg                        247 ECB             : 
  774 pg                        248 CBC        3420 :     opaque->btpo_flags &= ~BTP_HALF_DEAD;
                                249            3420 :     opaque->btpo_flags |= BTP_DELETED | BTP_HAS_FULLXID;
  774 pg                        250 GIC        3420 :     header->pd_lower = MAXALIGN(SizeOfPageHeaderData) +
  774 pg                        251 ECB             :         sizeof(BTDeletedPageData);
  774 pg                        252 GIC        3420 :     header->pd_upper = header->pd_special;
                                253                 : 
  774 pg                        254 ECB             :     /* Set safexid in deleted page */
  774 pg                        255 CBC        3420 :     contents = ((BTDeletedPageData *) PageGetContents(page));
                                256            3420 :     contents->safexid = safexid;
  774 pg                        257 GIC        3420 : }
                                258                 : 
  774 pg                        259 ECB             : static inline FullTransactionId
  774 pg                        260 GIC         721 : BTPageGetDeleteXid(Page page)
                                261                 : {
                                262                 :     BTPageOpaque opaque;
                                263                 :     BTDeletedPageData *contents;
                                264                 : 
  774 pg                        265 ECB             :     /* We only expect to be called with a deleted page */
  774 pg                        266 CBC         721 :     Assert(!PageIsNew(page));
  373 michael                   267             721 :     opaque = BTPageGetOpaque(page);
  774 pg                        268 GIC         721 :     Assert(P_ISDELETED(opaque));
                                269                 : 
  774 pg                        270 ECB             :     /* pg_upgrade'd deleted page -- must be safe to delete now */
  774 pg                        271 GBC         721 :     if (!P_HAS_FULLXID(opaque))
  774 pg                        272 UIC           0 :         return FirstNormalFullTransactionId;
                                273                 : 
  774 pg                        274 ECB             :     /* Get safexid from deleted page */
  774 pg                        275 CBC         721 :     contents = ((BTDeletedPageData *) PageGetContents(page));
  774 pg                        276 GIC         721 :     return contents->safexid;
                                277                 : }
                                278                 : 
                                279                 : /*
                                280                 :  * Is an existing page recyclable?
                                281                 :  *
                                282                 :  * This exists to centralize the policy on which deleted pages are now safe to
                                283                 :  * re-use.  However, _bt_pendingfsm_finalize() duplicates some of the same
                                284                 :  * logic because it doesn't work directly with pages -- keep the two in sync.
                                285                 :  *
                                286                 :  * Note: PageIsNew() pages are always safe to recycle, but we can't deal with
                                287                 :  * them here (caller is responsible for that case themselves).  Caller might
                                288                 :  * well need special handling for new pages anyway.
                                289                 :  */
  774 pg                        290 ECB             : static inline bool
    6 pg                        291 GNC       33197 : BTPageIsRecyclable(Page page, Relation heaprel)
                                292                 : {
                                293                 :     BTPageOpaque opaque;
  774 pg                        294 ECB             : 
  774 pg                        295 GIC       33197 :     Assert(!PageIsNew(page));
                                296                 : 
  774 pg                        297 ECB             :     /* Recycling okay iff page is deleted and safexid is old enough */
  373 michael                   298 CBC       33197 :     opaque = BTPageGetOpaque(page);
  774 pg                        299 GIC       33197 :     if (P_ISDELETED(opaque))
                                300                 :     {
                                301                 :         /*
                                302                 :          * The page was deleted, but when? If it was just deleted, a scan
                                303                 :          * might have seen the downlink to it, and will read the page later.
                                304                 :          * As long as that can happen, we must keep the deleted page around as
                                305                 :          * a tombstone.
                                306                 :          *
                                307                 :          * For that check if the deletion XID could still be visible to
                                308                 :          * anyone. If not, then no scan that's still in progress could have
                                309                 :          * seen its downlink, and we can recycle it.
                                310                 :          */
    6 pg                        311 GNC         493 :         return GlobalVisCheckRemovableFullXid(heaprel, BTPageGetDeleteXid(page));
                                312                 :     }
                                313                 : 
  774 pg                        314 GIC       32704 :     return false;
                                315                 : }
                                316                 : 
                                317                 : /*
                                318                 :  * BTVacState and BTPendingFSM are private nbtree.c state used during VACUUM.
                                319                 :  * They are exported for use by page deletion related code in nbtpage.c.
                                320                 :  */
                                321                 : typedef struct BTPendingFSM
                                322                 : {
                                323                 :     BlockNumber target;         /* Page deleted by current VACUUM */
                                324                 :     FullTransactionId safexid;  /* Page's BTDeletedPageData.safexid */
                                325                 : } BTPendingFSM;
                                326                 : 
                                327                 : typedef struct BTVacState
                                328                 : {
                                329                 :     IndexVacuumInfo *info;
                                330                 :     IndexBulkDeleteResult *stats;
                                331                 :     IndexBulkDeleteCallback callback;
                                332                 :     void       *callback_state;
                                333                 :     BTCycleId   cycleid;
                                334                 :     MemoryContext pagedelcontext;
                                335                 : 
                                336                 :     /*
                                337                 :      * _bt_pendingfsm_finalize() state
                                338                 :      */
                                339                 :     int         bufsize;        /* pendingpages space (in # elements) */
                                340                 :     int         maxbufsize;     /* max bufsize that respects work_mem */
                                341                 :     BTPendingFSM *pendingpages; /* One entry per newly deleted page */
                                342                 :     int         npendingpages;  /* current # valid pendingpages */
                                343                 : } BTVacState;
                                344                 : 
                                345                 : /*
                                346                 :  *  Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
                                347                 :  *  page.  The high key is not a tuple that is used to visit the heap.  It is
                                348                 :  *  a pivot tuple (see "Notes on B-Tree tuple format" below for definition).
                                349                 :  *  The high key on a page is required to be greater than or equal to any
                                350                 :  *  other key that appears on the page.  If we find ourselves trying to
                                351                 :  *  insert a key that is strictly > high key, we know we need to move right
                                352                 :  *  (this should only happen if the page was split since we examined the
                                353                 :  *  parent page).
                                354                 :  *
                                355                 :  *  Our insertion algorithm guarantees that we can use the initial least key
                                356                 :  *  on our right sibling as the high key.  Once a page is created, its high
                                357                 :  *  key changes only if the page is split.
                                358                 :  *
                                359                 :  *  On a non-rightmost page, the high key lives in item 1 and data items
                                360                 :  *  start in item 2.  Rightmost pages have no high key, so we store data
                                361                 :  *  items beginning in item 1.
                                362                 :  */
                                363                 : 
                                364                 : #define P_HIKEY             ((OffsetNumber) 1)
                                365                 : #define P_FIRSTKEY          ((OffsetNumber) 2)
                                366                 : #define P_FIRSTDATAKEY(opaque)  (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
                                367                 : 
                                368                 : /*
                                369                 :  * Notes on B-Tree tuple format, and key and non-key attributes:
                                370                 :  *
                                371                 :  * INCLUDE B-Tree indexes have non-key attributes.  These are extra
                                372                 :  * attributes that may be returned by index-only scans, but do not influence
                                373                 :  * the order of items in the index (formally, non-key attributes are not
                                374                 :  * considered to be part of the key space).  Non-key attributes are only
                                375                 :  * present in leaf index tuples whose item pointers actually point to heap
                                376                 :  * tuples (non-pivot tuples).  _bt_check_natts() enforces the rules
                                377                 :  * described here.
                                378                 :  *
                                379                 :  * Non-pivot tuple format (plain/non-posting variant):
                                380                 :  *
                                381                 :  *  t_tid | t_info | key values | INCLUDE columns, if any
                                382                 :  *
                                383                 :  * t_tid points to the heap TID, which is a tiebreaker key column as of
                                384                 :  * BTREE_VERSION 4.
                                385                 :  *
                                386                 :  * Non-pivot tuples complement pivot tuples, which only have key columns.
                                387                 :  * The sole purpose of pivot tuples is to represent how the key space is
                                388                 :  * separated.  In general, any B-Tree index that has more than one level
                                389                 :  * (i.e. any index that does not just consist of a metapage and a single
                                390                 :  * leaf root page) must have some number of pivot tuples, since pivot
                                391                 :  * tuples are used for traversing the tree.  Suffix truncation can omit
                                392                 :  * trailing key columns when a new pivot is formed, which makes minus
                                393                 :  * infinity their logical value.  Since BTREE_VERSION 4 indexes treat heap
                                394                 :  * TID as a trailing key column that ensures that all index tuples are
                                395                 :  * physically unique, it is necessary to represent heap TID as a trailing
                                396                 :  * key column in pivot tuples, though very often this can be truncated
                                397                 :  * away, just like any other key column. (Actually, the heap TID is
                                398                 :  * omitted rather than truncated, since its representation is different to
                                399                 :  * the non-pivot representation.)
                                400                 :  *
                                401                 :  * Pivot tuple format:
                                402                 :  *
                                403                 :  *  t_tid | t_info | key values | [heap TID]
                                404                 :  *
                                405                 :  * We store the number of columns present inside pivot tuples by abusing
                                406                 :  * their t_tid offset field, since pivot tuples never need to store a real
                                407                 :  * offset (pivot tuples generally store a downlink in t_tid, though).  The
                                408                 :  * offset field only stores the number of columns/attributes when the
                                409                 :  * INDEX_ALT_TID_MASK bit is set, which doesn't count the trailing heap
                                410                 :  * TID column sometimes stored in pivot tuples -- that's represented by
                                411                 :  * the presence of BT_PIVOT_HEAP_TID_ATTR.  The INDEX_ALT_TID_MASK bit in
                                412                 :  * t_info is always set on BTREE_VERSION 4 pivot tuples, since
                                413                 :  * BTreeTupleIsPivot() must work reliably on heapkeyspace versions.
                                414                 :  *
                                415                 :  * In version 2 or version 3 (!heapkeyspace) indexes, INDEX_ALT_TID_MASK
                                416                 :  * might not be set in pivot tuples.  BTreeTupleIsPivot() won't work
                                417                 :  * reliably as a result.  The number of columns stored is implicitly the
                                418                 :  * same as the number of columns in the index, just like any non-pivot
                                419                 :  * tuple. (The number of columns stored should not vary, since suffix
                                420                 :  * truncation of key columns is unsafe within any !heapkeyspace index.)
                                421                 :  *
                                422                 :  * The 12 least significant bits from t_tid's offset number are used to
                                423                 :  * represent the number of key columns within a pivot tuple.  This leaves 4
                                424                 :  * status bits (BT_STATUS_OFFSET_MASK bits), which are shared by all tuples
                                425                 :  * that have the INDEX_ALT_TID_MASK bit set (set in t_info) to store basic
                                426                 :  * tuple metadata.  BTreeTupleIsPivot() and BTreeTupleIsPosting() use the
                                427                 :  * BT_STATUS_OFFSET_MASK bits.
                                428                 :  *
                                429                 :  * Sometimes non-pivot tuples also use a representation that repurposes
                                430                 :  * t_tid to store metadata rather than a TID.  PostgreSQL v13 introduced a
                                431                 :  * new non-pivot tuple format to support deduplication: posting list
                                432                 :  * tuples.  Deduplication merges together multiple equal non-pivot tuples
                                433                 :  * into a logically equivalent, space efficient representation.  A posting
                                434                 :  * list is an array of ItemPointerData elements.  Non-pivot tuples are
                                435                 :  * merged together to form posting list tuples lazily, at the point where
                                436                 :  * we'd otherwise have to split a leaf page.
                                437                 :  *
                                438                 :  * Posting tuple format (alternative non-pivot tuple representation):
                                439                 :  *
                                440                 :  *  t_tid | t_info | key values | posting list (TID array)
                                441                 :  *
                                442                 :  * Posting list tuples are recognized as such by having the
                                443                 :  * INDEX_ALT_TID_MASK status bit set in t_info and the BT_IS_POSTING status
                                444                 :  * bit set in t_tid's offset number.  These flags redefine the content of
                                445                 :  * the posting tuple's t_tid to store the location of the posting list
                                446                 :  * (instead of a block number), as well as the total number of heap TIDs
                                447                 :  * present in the tuple (instead of a real offset number).
                                448                 :  *
                                449                 :  * The 12 least significant bits from t_tid's offset number are used to
                                450                 :  * represent the number of heap TIDs present in the tuple, leaving 4 status
                                451                 :  * bits (the BT_STATUS_OFFSET_MASK bits).  Like any non-pivot tuple, the
                                452                 :  * number of columns stored is always implicitly the total number in the
                                453                 :  * index (in practice there can never be non-key columns stored, since
                                454                 :  * deduplication is not supported with INCLUDE indexes).
                                455                 :  */
                                456                 : #define INDEX_ALT_TID_MASK          INDEX_AM_RESERVED_BIT
                                457                 : 
                                458                 : /* Item pointer offset bit masks */
                                459                 : #define BT_OFFSET_MASK              0x0FFF
                                460                 : #define BT_STATUS_OFFSET_MASK       0xF000
                                461                 : /* BT_STATUS_OFFSET_MASK status bits */
                                462                 : #define BT_PIVOT_HEAP_TID_ATTR      0x1000
                                463                 : #define BT_IS_POSTING               0x2000
                                464                 : 
                                465                 : /*
                                466                 :  * Mask allocated for number of keys in index tuple must be able to fit
                                467                 :  * maximum possible number of index attributes
                                468                 :  */
                                469                 : StaticAssertDecl(BT_OFFSET_MASK >= INDEX_MAX_KEYS,
                                470                 :                  "BT_OFFSET_MASK can't fit INDEX_MAX_KEYS");
                                471                 : 
                                472                 : /*
                                473                 :  * Note: BTreeTupleIsPivot() can have false negatives (but not false
                                474                 :  * positives) when used with !heapkeyspace indexes
                                475                 :  */
                                476                 : static inline bool
 1138                           477       710208502 : BTreeTupleIsPivot(IndexTuple itup)
                                478                 : {
 1138 pg                        479 CBC   710208502 :     if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
 1138 pg                        480 GIC   484022165 :         return false;
 1138 pg                        481 ECB             :     /* absence of BT_IS_POSTING in offset number indicates pivot tuple */
 1138 pg                        482 CBC   226186337 :     if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) != 0)
 1138 pg                        483 GIC    11241898 :         return false;
 1138 pg                        484 ECB             : 
 1138 pg                        485 CBC   214944439 :     return true;
                                486                 : }
 1138 pg                        487 ECB             : 
                                488                 : static inline bool
 1138 pg                        489 GIC   574156255 : BTreeTupleIsPosting(IndexTuple itup)
                                490                 : {
 1138 pg                        491 CBC   574156255 :     if ((itup->t_info & INDEX_ALT_TID_MASK) == 0)
 1138 pg                        492 GIC   363628961 :         return false;
 1138 pg                        493 ECB             :     /* presence of BT_IS_POSTING in offset number indicates posting tuple */
 1138 pg                        494 CBC   210527294 :     if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) & BT_IS_POSTING) == 0)
 1138 pg                        495 GIC   155203594 :         return false;
 1138 pg                        496 ECB             : 
 1138 pg                        497 CBC    55323700 :     return true;
                                498                 : }
 1138 pg                        499 ECB             : 
                                500                 : static inline void
 1082 pg                        501 GIC      378731 : BTreeTupleSetPosting(IndexTuple itup, uint16 nhtids, int postingoffset)
                                502                 : {
 1082 pg                        503 CBC      378731 :     Assert(nhtids > 1);
 1082 pg                        504 GIC      378731 :     Assert((nhtids & BT_STATUS_OFFSET_MASK) == 0);
 1133 pg                        505 CBC      378731 :     Assert((size_t) postingoffset == MAXALIGN(postingoffset));
 1138                           506          378731 :     Assert(postingoffset < INDEX_SIZE_MASK);
 1082                           507          378731 :     Assert(!BTreeTupleIsPivot(itup));
 1138 pg                        508 ECB             : 
 1138 pg                        509 CBC      378731 :     itup->t_info |= INDEX_ALT_TID_MASK;
 1138 pg                        510 GIC      378731 :     ItemPointerSetOffsetNumber(&itup->t_tid, (nhtids | BT_IS_POSTING));
 1138 pg                        511 CBC      378731 :     ItemPointerSetBlockNumber(&itup->t_tid, postingoffset);
                                512          378731 : }
 1138 pg                        513 ECB             : 
                                514                 : static inline uint16
 1138 pg                        515 GIC    18767101 : BTreeTupleGetNPosting(IndexTuple posting)
                                516                 : {
 1138 pg                        517 ECB             :     OffsetNumber existing;
                                518                 : 
 1138 pg                        519 GIC    18767101 :     Assert(BTreeTupleIsPosting(posting));
                                520                 : 
 1138 pg                        521 CBC    18767101 :     existing = ItemPointerGetOffsetNumberNoCheck(&posting->t_tid);
 1138 pg                        522 GIC    18767101 :     return (existing & BT_OFFSET_MASK);
 1138 pg                        523 ECB             : }
 1816 teodor                    524                 : 
                                525                 : static inline uint32
 1138 pg                        526 GIC    21080154 : BTreeTupleGetPostingOffset(IndexTuple posting)
                                527                 : {
 1138 pg                        528 CBC    21080154 :     Assert(BTreeTupleIsPosting(posting));
                                529                 : 
                                530        21080154 :     return ItemPointerGetBlockNumberNoCheck(&posting->t_tid);
                                531                 : }
 1138 pg                        532 ECB             : 
                                533                 : static inline ItemPointer
 1138 pg                        534 GIC    18895118 : BTreeTupleGetPosting(IndexTuple posting)
                                535                 : {
 1138 pg                        536 CBC    37790236 :     return (ItemPointer) ((char *) posting +
 1138 pg                        537 GIC    18895118 :                           BTreeTupleGetPostingOffset(posting));
 1138 pg                        538 ECB             : }
                                539                 : 
                                540                 : static inline ItemPointer
 1138 pg                        541 GIC    15471349 : BTreeTupleGetPostingN(IndexTuple posting, int n)
                                542                 : {
 1138 pg                        543 CBC    15471349 :     return BTreeTupleGetPosting(posting) + n;
                                544                 : }
 1828 teodor                    545 ECB             : 
                                546                 : /*
                                547                 :  * Get/set downlink block number in pivot tuple.
                                548                 :  *
                                549                 :  * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
                                550                 :  * !heapkeyspace indexes would exhibit false positive assertion failures.
                                551                 :  */
                                552                 : static inline BlockNumber
 1138 pg                        553 GIC    12322514 : BTreeTupleGetDownLink(IndexTuple pivot)
                                554                 : {
 1138 pg                        555 CBC    12322514 :     return ItemPointerGetBlockNumberNoCheck(&pivot->t_tid);
                                556                 : }
 1138 pg                        557 ECB             : 
                                558                 : static inline void
 1138 pg                        559 GIC       72893 : BTreeTupleSetDownLink(IndexTuple pivot, BlockNumber blkno)
                                560                 : {
 1138 pg                        561 CBC       72893 :     ItemPointerSetBlockNumber(&pivot->t_tid, blkno);
 1138 pg                        562 GIC       72893 : }
 1812 teodor                    563 ECB             : 
 1816                           564                 : /*
                                565                 :  * Get number of attributes within tuple.
                                566                 :  *
                                567                 :  * Note that this does not include an implicit tiebreaker heap TID
                                568                 :  * attribute, if any.  Note also that the number of key attributes must be
                                569                 :  * explicitly represented in all heapkeyspace pivot tuples.
                                570                 :  *
                                571                 :  * Note: This is defined as a macro rather than an inline function to
                                572                 :  * avoid including rel.h.
                                573                 :  */
                                574                 : #define BTreeTupleGetNAtts(itup, rel)   \
                                575                 :     ( \
                                576                 :         (BTreeTupleIsPivot(itup)) ? \
                                577                 :         ( \
                                578                 :             ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_OFFSET_MASK \
                                579                 :         ) \
                                580                 :         : \
                                581                 :         IndexRelationGetNumberOfAttributes(rel) \
                                582                 :     )
                                583                 : 
                                584                 : /*
                                585                 :  * Set number of key attributes in tuple.
                                586                 :  *
                                587                 :  * The heap TID tiebreaker attribute bit may also be set here, indicating that
                                588                 :  * a heap TID value will be stored at the end of the tuple (i.e. using the
                                589                 :  * special pivot tuple representation).
                                590                 :  */
                                591                 : static inline void
 1097 pg                        592 GIC       96252 : BTreeTupleSetNAtts(IndexTuple itup, uint16 nkeyatts, bool heaptid)
                                593                 : {
 1097 pg                        594 CBC       96252 :     Assert(nkeyatts <= INDEX_MAX_KEYS);
 1082 pg                        595 GIC       96252 :     Assert((nkeyatts & BT_STATUS_OFFSET_MASK) == 0);
 1097 pg                        596 CBC       96252 :     Assert(!heaptid || nkeyatts > 0);
                                597           96252 :     Assert(!BTreeTupleIsPivot(itup) || nkeyatts == 0);
 1138 pg                        598 ECB             : 
 1138 pg                        599 CBC       96252 :     itup->t_info |= INDEX_ALT_TID_MASK;
                                600                 : 
 1097                           601           96252 :     if (heaptid)
 1097 pg                        602 GIC         516 :         nkeyatts |= BT_PIVOT_HEAP_TID_ATTR;
 1138 pg                        603 ECB             : 
 1097                           604                 :     /* BT_IS_POSTING bit is deliberately unset here */
 1097 pg                        605 GIC       96252 :     ItemPointerSetOffsetNumber(&itup->t_tid, nkeyatts);
                                606           96252 :     Assert(BTreeTupleIsPivot(itup));
 1138 pg                        607 CBC       96252 : }
 1138 pg                        608 ECB             : 
                                609                 : /*
                                610                 :  * Get/set leaf page's "top parent" link from its high key.  Used during page
                                611                 :  * deletion.
                                612                 :  *
                                613                 :  * Note: Cannot assert that tuple is a pivot tuple.  If we did so then
                                614                 :  * !heapkeyspace indexes would exhibit false positive assertion failures.
                                615                 :  */
                                616                 : static inline BlockNumber
 1138 pg                        617 GIC        2755 : BTreeTupleGetTopParent(IndexTuple leafhikey)
                                618                 : {
 1138 pg                        619 CBC        2755 :     return ItemPointerGetBlockNumberNoCheck(&leafhikey->t_tid);
                                620                 : }
 1138 pg                        621 ECB             : 
                                622                 : static inline void
 1138 pg                        623 GIC        3420 : BTreeTupleSetTopParent(IndexTuple leafhikey, BlockNumber blkno)
                                624                 : {
 1138 pg                        625 CBC        3420 :     ItemPointerSetBlockNumber(&leafhikey->t_tid, blkno);
 1097 pg                        626 GIC        3420 :     BTreeTupleSetNAtts(leafhikey, 0, false);
 1138 pg                        627 CBC        3420 : }
 1138 pg                        628 ECB             : 
                                629                 : /*
                                630                 :  * Get tiebreaker heap TID attribute, if any.
                                631                 :  *
                                632                 :  * This returns the first/lowest heap TID in the case of a posting list tuple.
                                633                 :  */
                                634                 : static inline ItemPointer
 1138 pg                        635 GIC    72430312 : BTreeTupleGetHeapTID(IndexTuple itup)
                                636                 : {
 1138 pg                        637 CBC    72430312 :     if (BTreeTupleIsPivot(itup))
                                638                 :     {
 1138 pg                        639 ECB             :         /* Pivot tuple heap TID representation? */
 1138 pg                        640 GIC    50192192 :         if ((ItemPointerGetOffsetNumberNoCheck(&itup->t_tid) &
                                641                 :              BT_PIVOT_HEAP_TID_ATTR) != 0)
 1138 pg                        642 CBC      243318 :             return (ItemPointer) ((char *) itup + IndexTupleSize(itup) -
                                643                 :                                   sizeof(ItemPointerData));
 1138 pg                        644 ECB             : 
                                645                 :         /* Heap TID attribute was truncated */
 1138 pg                        646 GIC    49948874 :         return NULL;
                                647                 :     }
 1138 pg                        648 CBC    22238120 :     else if (BTreeTupleIsPosting(itup))
 1138 pg                        649 GIC      989727 :         return BTreeTupleGetPosting(itup);
 1138 pg                        650 ECB             : 
 1138 pg                        651 CBC    21248393 :     return &itup->t_tid;
                                652                 : }
 1138 pg                        653 ECB             : 
                                654                 : /*
                                655                 :  * Get maximum heap TID attribute, which could be the only TID in the case of
                                656                 :  * a non-pivot tuple that does not have a posting list tuple.
                                657                 :  *
                                658                 :  * Works with non-pivot tuples only.
                                659                 :  */
                                660                 : static inline ItemPointer
 1138 pg                        661 GIC      170967 : BTreeTupleGetMaxHeapTID(IndexTuple itup)
                                662                 : {
 1138 pg                        663 CBC      170967 :     Assert(!BTreeTupleIsPivot(itup));
                                664                 : 
                                665          170967 :     if (BTreeTupleIsPosting(itup))
                                666                 :     {
                                667          170471 :         uint16      nposting = BTreeTupleGetNPosting(itup);
                                668                 : 
                                669          170471 :         return BTreeTupleGetPostingN(itup, nposting - 1);
                                670                 :     }
 1138 pg                        671 ECB             : 
 1138 pg                        672 GIC         496 :     return &itup->t_tid;
                                673                 : }
 1481 pg                        674 ECB             : 
                                675                 : /*
                                676                 :  *  Operator strategy numbers for B-tree have been moved to access/stratnum.h,
                                677                 :  *  because many places need to use them in ScanKeyInit() calls.
                                678                 :  *
                                679                 :  *  The strategy numbers are chosen so that we can commute them by
                                680                 :  *  subtraction, thus:
                                681                 :  */
                                682                 : #define BTCommuteStrategyNumber(strat)  (BTMaxStrategyNumber + 1 - (strat))
                                683                 : 
                                684                 : /*
                                685                 :  *  When a new operator class is declared, we require that the user
                                686                 :  *  supply us with an amproc procedure (BTORDER_PROC) for determining
                                687                 :  *  whether, for two keys a and b, a < b, a = b, or a > b.  This routine
                                688                 :  *  must return < 0, 0, > 0, respectively, in these three cases.
                                689                 :  *
                                690                 :  *  To facilitate accelerated sorting, an operator class may choose to
                                691                 :  *  offer a second procedure (BTSORTSUPPORT_PROC).  For full details, see
                                692                 :  *  src/include/utils/sortsupport.h.
                                693                 :  *
                                694                 :  *  To support window frames defined by "RANGE offset PRECEDING/FOLLOWING",
                                695                 :  *  an operator class may choose to offer a third amproc procedure
                                696                 :  *  (BTINRANGE_PROC), independently of whether it offers sortsupport.
                                697                 :  *  For full details, see doc/src/sgml/btree.sgml.
                                698                 :  *
                                699                 :  *  To facilitate B-Tree deduplication, an operator class may choose to
                                700                 :  *  offer a forth amproc procedure (BTEQUALIMAGE_PROC).  For full details,
                                701                 :  *  see doc/src/sgml/btree.sgml.
                                702                 :  */
                                703                 : 
                                704                 : #define BTORDER_PROC        1
                                705                 : #define BTSORTSUPPORT_PROC  2
                                706                 : #define BTINRANGE_PROC      3
                                707                 : #define BTEQUALIMAGE_PROC   4
                                708                 : #define BTOPTIONS_PROC      5
                                709                 : #define BTNProcs            5
                                710                 : 
                                711                 : /*
                                712                 :  *  We need to be able to tell the difference between read and write
                                713                 :  *  requests for pages, in order to do locking correctly.
                                714                 :  */
                                715                 : 
                                716                 : #define BT_READ         BUFFER_LOCK_SHARE
                                717                 : #define BT_WRITE        BUFFER_LOCK_EXCLUSIVE
                                718                 : 
                                719                 : /*
                                720                 :  * BTStackData -- As we descend a tree, we push the location of pivot
                                721                 :  * tuples whose downlink we are about to follow onto a private stack.  If
                                722                 :  * we split a leaf, we use this stack to walk back up the tree and insert
                                723                 :  * data into its parent page at the correct location.  We also have to
                                724                 :  * recursively insert into the grandparent page if and when the parent page
                                725                 :  * splits.  Our private stack can become stale due to concurrent page
                                726                 :  * splits and page deletions, but it should never give us an irredeemably
                                727                 :  * bad picture.
                                728                 :  */
                                729                 : typedef struct BTStackData
                                730                 : {
                                731                 :     BlockNumber bts_blkno;
                                732                 :     OffsetNumber bts_offset;
                                733                 :     struct BTStackData *bts_parent;
                                734                 : } BTStackData;
                                735                 : 
                                736                 : typedef BTStackData *BTStack;
                                737                 : 
                                738                 : /*
                                739                 :  * BTScanInsertData is the btree-private state needed to find an initial
                                740                 :  * position for an indexscan, or to insert new tuples -- an "insertion
                                741                 :  * scankey" (not to be confused with a search scankey).  It's used to descend
                                742                 :  * a B-Tree using _bt_search.
                                743                 :  *
                                744                 :  * heapkeyspace indicates if we expect all keys in the index to be physically
                                745                 :  * unique because heap TID is used as a tiebreaker attribute, and if index may
                                746                 :  * have truncated key attributes in pivot tuples.  This is actually a property
                                747                 :  * of the index relation itself (not an indexscan).  heapkeyspace indexes are
                                748                 :  * indexes whose version is >= version 4.  It's convenient to keep this close
                                749                 :  * by, rather than accessing the metapage repeatedly.
                                750                 :  *
                                751                 :  * allequalimage is set to indicate that deduplication is safe for the index.
                                752                 :  * This is also a property of the index relation rather than an indexscan.
                                753                 :  *
                                754                 :  * anynullkeys indicates if any of the keys had NULL value when scankey was
                                755                 :  * built from index tuple (note that already-truncated tuple key attributes
                                756                 :  * set NULL as a placeholder key value, which also affects value of
                                757                 :  * anynullkeys).  This is a convenience for unique index non-pivot tuple
                                758                 :  * insertion, which usually temporarily unsets scantid, but shouldn't iff
                                759                 :  * anynullkeys is true.  Value generally matches non-pivot tuple's HasNulls
                                760                 :  * bit, but may not when inserting into an INCLUDE index (tuple header value
                                761                 :  * is affected by the NULL-ness of both key and non-key attributes).
                                762                 :  *
                                763                 :  * When nextkey is false (the usual case), _bt_search and _bt_binsrch will
                                764                 :  * locate the first item >= scankey.  When nextkey is true, they will locate
                                765                 :  * the first item > scan key.
                                766                 :  *
                                767                 :  * pivotsearch is set to true by callers that want to re-find a leaf page
                                768                 :  * using a scankey built from a leaf page's high key.  Most callers set this
                                769                 :  * to false.
                                770                 :  *
                                771                 :  * scantid is the heap TID that is used as a final tiebreaker attribute.  It
                                772                 :  * is set to NULL when index scan doesn't need to find a position for a
                                773                 :  * specific physical tuple.  Must be set when inserting new tuples into
                                774                 :  * heapkeyspace indexes, since every tuple in the tree unambiguously belongs
                                775                 :  * in one exact position (it's never set with !heapkeyspace indexes, though).
                                776                 :  * Despite the representational difference, nbtree search code considers
                                777                 :  * scantid to be just another insertion scankey attribute.
                                778                 :  *
                                779                 :  * scankeys is an array of scan key entries for attributes that are compared
                                780                 :  * before scantid (user-visible attributes).  keysz is the size of the array.
                                781                 :  * During insertion, there must be a scan key for every attribute, but when
                                782                 :  * starting a regular index scan some can be omitted.  The array is used as a
                                783                 :  * flexible array member, though it's sized in a way that makes it possible to
                                784                 :  * use stack allocations.  See nbtree/README for full details.
                                785                 :  */
                                786                 : typedef struct BTScanInsertData
                                787                 : {
                                788                 :     bool        heapkeyspace;
                                789                 :     bool        allequalimage;
                                790                 :     bool        anynullkeys;
                                791                 :     bool        nextkey;
                                792                 :     bool        pivotsearch;
                                793                 :     ItemPointer scantid;        /* tiebreaker for scankeys */
                                794                 :     int         keysz;          /* Size of scankeys array */
                                795                 :     ScanKeyData scankeys[INDEX_MAX_KEYS];   /* Must appear last */
                                796                 : } BTScanInsertData;
                                797                 : 
                                798                 : typedef BTScanInsertData *BTScanInsert;
                                799                 : 
                                800                 : /*
                                801                 :  * BTInsertStateData is a working area used during insertion.
                                802                 :  *
                                803                 :  * This is filled in after descending the tree to the first leaf page the new
                                804                 :  * tuple might belong on.  Tracks the current position while performing
                                805                 :  * uniqueness check, before we have determined which exact page to insert
                                806                 :  * to.
                                807                 :  *
                                808                 :  * (This should be private to nbtinsert.c, but it's also used by
                                809                 :  * _bt_binsrch_insert)
                                810                 :  */
                                811                 : typedef struct BTInsertStateData
                                812                 : {
                                813                 :     IndexTuple  itup;           /* Item we're inserting */
                                814                 :     Size        itemsz;         /* Size of itup -- should be MAXALIGN()'d */
                                815                 :     BTScanInsert itup_key;      /* Insertion scankey */
                                816                 : 
                                817                 :     /* Buffer containing leaf page we're likely to insert itup on */
                                818                 :     Buffer      buf;
                                819                 : 
                                820                 :     /*
                                821                 :      * Cache of bounds within the current buffer.  Only used for insertions
                                822                 :      * where _bt_check_unique is called.  See _bt_binsrch_insert and
                                823                 :      * _bt_findinsertloc for details.
                                824                 :      */
                                825                 :     bool        bounds_valid;
                                826                 :     OffsetNumber low;
                                827                 :     OffsetNumber stricthigh;
                                828                 : 
                                829                 :     /*
                                830                 :      * if _bt_binsrch_insert found the location inside existing posting list,
                                831                 :      * save the position inside the list.  -1 sentinel value indicates overlap
                                832                 :      * with an existing posting list tuple that has its LP_DEAD bit set.
                                833                 :      */
                                834                 :     int         postingoff;
                                835                 : } BTInsertStateData;
                                836                 : 
                                837                 : typedef BTInsertStateData *BTInsertState;
                                838                 : 
                                839                 : /*
                                840                 :  * State used to representing an individual pending tuple during
                                841                 :  * deduplication.
                                842                 :  */
                                843                 : typedef struct BTDedupInterval
                                844                 : {
                                845                 :     OffsetNumber baseoff;
                                846                 :     uint16      nitems;
                                847                 : } BTDedupInterval;
                                848                 : 
                                849                 : /*
                                850                 :  * BTDedupStateData is a working area used during deduplication.
                                851                 :  *
                                852                 :  * The status info fields track the state of a whole-page deduplication pass.
                                853                 :  * State about the current pending posting list is also tracked.
                                854                 :  *
                                855                 :  * A pending posting list is comprised of a contiguous group of equal items
                                856                 :  * from the page, starting from page offset number 'baseoff'.  This is the
                                857                 :  * offset number of the "base" tuple for new posting list.  'nitems' is the
                                858                 :  * current total number of existing items from the page that will be merged to
                                859                 :  * make a new posting list tuple, including the base tuple item.  (Existing
                                860                 :  * items may themselves be posting list tuples, or regular non-pivot tuples.)
                                861                 :  *
                                862                 :  * The total size of the existing tuples to be freed when pending posting list
                                863                 :  * is processed gets tracked by 'phystupsize'.  This information allows
                                864                 :  * deduplication to calculate the space saving for each new posting list
                                865                 :  * tuple, and for the entire pass over the page as a whole.
                                866                 :  */
                                867                 : typedef struct BTDedupStateData
                                868                 : {
                                869                 :     /* Deduplication status info for entire pass over page */
                                870                 :     bool        deduplicate;    /* Still deduplicating page? */
                                871                 :     int         nmaxitems;      /* Number of max-sized tuples so far */
                                872                 :     Size        maxpostingsize; /* Limit on size of final tuple */
                                873                 : 
                                874                 :     /* Metadata about base tuple of current pending posting list */
                                875                 :     IndexTuple  base;           /* Use to form new posting list */
                                876                 :     OffsetNumber baseoff;       /* page offset of base */
                                877                 :     Size        basetupsize;    /* base size without original posting list */
                                878                 : 
                                879                 :     /* Other metadata about pending posting list */
                                880                 :     ItemPointer htids;          /* Heap TIDs in pending posting list */
                                881                 :     int         nhtids;         /* Number of heap TIDs in htids array */
                                882                 :     int         nitems;         /* Number of existing tuples/line pointers */
                                883                 :     Size        phystupsize;    /* Includes line pointer overhead */
                                884                 : 
                                885                 :     /*
                                886                 :      * Array of tuples to go on new version of the page.  Contains one entry
                                887                 :      * for each group of consecutive items.  Note that existing tuples that
                                888                 :      * will not become posting list tuples do not appear in the array (they
                                889                 :      * are implicitly unchanged by deduplication pass).
                                890                 :      */
                                891                 :     int         nintervals;     /* current number of intervals in array */
                                892                 :     BTDedupInterval intervals[MaxIndexTuplesPerPage];
                                893                 : } BTDedupStateData;
                                894                 : 
                                895                 : typedef BTDedupStateData *BTDedupState;
                                896                 : 
                                897                 : /*
                                898                 :  * BTVacuumPostingData is state that represents how to VACUUM (or delete) a
                                899                 :  * posting list tuple when some (though not all) of its TIDs are to be
                                900                 :  * deleted.
                                901                 :  *
                                902                 :  * Convention is that itup field is the original posting list tuple on input,
                                903                 :  * and palloc()'d final tuple used to overwrite existing tuple on output.
                                904                 :  */
                                905                 : typedef struct BTVacuumPostingData
                                906                 : {
                                907                 :     /* Tuple that will be/was updated */
                                908                 :     IndexTuple  itup;
                                909                 :     OffsetNumber updatedoffset;
                                910                 : 
                                911                 :     /* State needed to describe final itup in WAL */
                                912                 :     uint16      ndeletedtids;
                                913                 :     uint16      deletetids[FLEXIBLE_ARRAY_MEMBER];
                                914                 : } BTVacuumPostingData;
                                915                 : 
                                916                 : typedef BTVacuumPostingData *BTVacuumPosting;
                                917                 : 
                                918                 : /*
                                919                 :  * BTScanOpaqueData is the btree-private state needed for an indexscan.
                                920                 :  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
                                921                 :  * details of the preprocessing), information about the current location
                                922                 :  * of the scan, and information about the marked location, if any.  (We use
                                923                 :  * BTScanPosData to represent the data needed for each of current and marked
                                924                 :  * locations.)  In addition we can remember some known-killed index entries
                                925                 :  * that must be marked before we can move off the current page.
                                926                 :  *
                                927                 :  * Index scans work a page at a time: we pin and read-lock the page, identify
                                928                 :  * all the matching items on the page and save them in BTScanPosData, then
                                929                 :  * release the read-lock while returning the items to the caller for
                                930                 :  * processing.  This approach minimizes lock/unlock traffic.  Note that we
                                931                 :  * keep the pin on the index page until the caller is done with all the items
                                932                 :  * (this is needed for VACUUM synchronization, see nbtree/README).  When we
                                933                 :  * are ready to step to the next page, if the caller has told us any of the
                                934                 :  * items were killed, we re-lock the page to mark them killed, then unlock.
                                935                 :  * Finally we drop the pin and step to the next page in the appropriate
                                936                 :  * direction.
                                937                 :  *
                                938                 :  * If we are doing an index-only scan, we save the entire IndexTuple for each
                                939                 :  * matched item, otherwise only its heap TID and offset.  The IndexTuples go
                                940                 :  * into a separate workspace array; each BTScanPosItem stores its tuple's
                                941                 :  * offset within that array.  Posting list tuples store a "base" tuple once,
                                942                 :  * allowing the same key to be returned for each TID in the posting list
                                943                 :  * tuple.
                                944                 :  */
                                945                 : 
                                946                 : typedef struct BTScanPosItem    /* what we remember about each match */
                                947                 : {
                                948                 :     ItemPointerData heapTid;    /* TID of referenced heap item */
                                949                 :     OffsetNumber indexOffset;   /* index item's location within page */
                                950                 :     LocationIndex tupleOffset;  /* IndexTuple's offset in workspace, if any */
                                951                 : } BTScanPosItem;
                                952                 : 
                                953                 : typedef struct BTScanPosData
                                954                 : {
                                955                 :     Buffer      buf;            /* if valid, the buffer is pinned */
                                956                 : 
                                957                 :     XLogRecPtr  lsn;            /* pos in the WAL stream when page was read */
                                958                 :     BlockNumber currPage;       /* page referenced by items array */
                                959                 :     BlockNumber nextPage;       /* page's right link when we scanned it */
                                960                 : 
                                961                 :     /*
                                962                 :      * moreLeft and moreRight track whether we think there may be matching
                                963                 :      * index entries to the left and right of the current page, respectively.
                                964                 :      * We can clear the appropriate one of these flags when _bt_checkkeys()
                                965                 :      * returns continuescan = false.
                                966                 :      */
                                967                 :     bool        moreLeft;
                                968                 :     bool        moreRight;
                                969                 : 
                                970                 :     /*
                                971                 :      * If we are doing an index-only scan, nextTupleOffset is the first free
                                972                 :      * location in the associated tuple storage workspace.
                                973                 :      */
                                974                 :     int         nextTupleOffset;
                                975                 : 
                                976                 :     /*
                                977                 :      * The items array is always ordered in index order (ie, increasing
                                978                 :      * indexoffset).  When scanning backwards it is convenient to fill the
                                979                 :      * array back-to-front, so we start at the last slot and fill downwards.
                                980                 :      * Hence we need both a first-valid-entry and a last-valid-entry counter.
                                981                 :      * itemIndex is a cursor showing which entry was last returned to caller.
                                982                 :      */
                                983                 :     int         firstItem;      /* first valid index in items[] */
                                984                 :     int         lastItem;       /* last valid index in items[] */
                                985                 :     int         itemIndex;      /* current index in items[] */
                                986                 : 
                                987                 :     BTScanPosItem items[MaxTIDsPerBTreePage];   /* MUST BE LAST */
                                988                 : } BTScanPosData;
                                989                 : 
                                990                 : typedef BTScanPosData *BTScanPos;
                                991                 : 
                                992                 : #define BTScanPosIsPinned(scanpos) \
                                993                 : ( \
                                994                 :     AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                                995                 :                 !BufferIsValid((scanpos).buf)), \
                                996                 :     BufferIsValid((scanpos).buf) \
                                997                 : )
                                998                 : #define BTScanPosUnpin(scanpos) \
                                999                 :     do { \
                               1000                 :         ReleaseBuffer((scanpos).buf); \
                               1001                 :         (scanpos).buf = InvalidBuffer; \
                               1002                 :     } while (0)
                               1003                 : #define BTScanPosUnpinIfPinned(scanpos) \
                               1004                 :     do { \
                               1005                 :         if (BTScanPosIsPinned(scanpos)) \
                               1006                 :             BTScanPosUnpin(scanpos); \
                               1007                 :     } while (0)
                               1008                 : 
                               1009                 : #define BTScanPosIsValid(scanpos) \
                               1010                 : ( \
                               1011                 :     AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
                               1012                 :                 !BufferIsValid((scanpos).buf)), \
                               1013                 :     BlockNumberIsValid((scanpos).currPage) \
                               1014                 : )
                               1015                 : #define BTScanPosInvalidate(scanpos) \
                               1016                 :     do { \
                               1017                 :         (scanpos).currPage = InvalidBlockNumber; \
                               1018                 :         (scanpos).nextPage = InvalidBlockNumber; \
                               1019                 :         (scanpos).buf = InvalidBuffer; \
                               1020                 :         (scanpos).lsn = InvalidXLogRecPtr; \
                               1021                 :         (scanpos).nextTupleOffset = 0; \
                               1022                 :     } while (0)
                               1023                 : 
                               1024                 : /* We need one of these for each equality-type SK_SEARCHARRAY scan key */
                               1025                 : typedef struct BTArrayKeyInfo
                               1026                 : {
                               1027                 :     int         scan_key;       /* index of associated key in arrayKeyData */
                               1028                 :     int         cur_elem;       /* index of current element in elem_values */
                               1029                 :     int         mark_elem;      /* index of marked element in elem_values */
                               1030                 :     int         num_elems;      /* number of elems in current array value */
                               1031                 :     Datum      *elem_values;    /* array of num_elems Datums */
                               1032                 : } BTArrayKeyInfo;
                               1033                 : 
                               1034                 : typedef struct BTScanOpaqueData
                               1035                 : {
                               1036                 :     /* these fields are set by _bt_preprocess_keys(): */
                               1037                 :     bool        qual_ok;        /* false if qual can never be satisfied */
                               1038                 :     int         numberOfKeys;   /* number of preprocessed scan keys */
                               1039                 :     ScanKey     keyData;        /* array of preprocessed scan keys */
                               1040                 : 
                               1041                 :     /* workspace for SK_SEARCHARRAY support */
                               1042                 :     ScanKey     arrayKeyData;   /* modified copy of scan->keyData */
                               1043                 :     int         numArrayKeys;   /* number of equality-type array keys (-1 if
                               1044                 :                                  * there are any unsatisfiable array keys) */
                               1045                 :     int         arrayKeyCount;  /* count indicating number of array scan keys
                               1046                 :                                  * processed */
                               1047                 :     BTArrayKeyInfo *arrayKeys;  /* info about each equality-type array key */
                               1048                 :     MemoryContext arrayContext; /* scan-lifespan context for array data */
                               1049                 : 
                               1050                 :     /* info about killed items if any (killedItems is NULL if never used) */
                               1051                 :     int        *killedItems;    /* currPos.items indexes of killed items */
                               1052                 :     int         numKilled;      /* number of currently stored items */
                               1053                 : 
                               1054                 :     /*
                               1055                 :      * If we are doing an index-only scan, these are the tuple storage
                               1056                 :      * workspaces for the currPos and markPos respectively.  Each is of size
                               1057                 :      * BLCKSZ, so it can hold as much as a full page's worth of tuples.
                               1058                 :      */
                               1059                 :     char       *currTuples;     /* tuple storage for currPos */
                               1060                 :     char       *markTuples;     /* tuple storage for markPos */
                               1061                 : 
                               1062                 :     /*
                               1063                 :      * If the marked position is on the same page as current position, we
                               1064                 :      * don't use markPos, but just keep the marked itemIndex in markItemIndex
                               1065                 :      * (all the rest of currPos is valid for the mark position). Hence, to
                               1066                 :      * determine if there is a mark, first look at markItemIndex, then at
                               1067                 :      * markPos.
                               1068                 :      */
                               1069                 :     int         markItemIndex;  /* itemIndex, or -1 if not valid */
                               1070                 : 
                               1071                 :     /* keep these last in struct for efficiency */
                               1072                 :     BTScanPosData currPos;      /* current position data */
                               1073                 :     BTScanPosData markPos;      /* marked position, if any */
                               1074                 : } BTScanOpaqueData;
                               1075                 : 
                               1076                 : typedef BTScanOpaqueData *BTScanOpaque;
                               1077                 : 
                               1078                 : /*
                               1079                 :  * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
                               1080                 :  * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
                               1081                 :  * index's indoption[] array entry for the index attribute.
                               1082                 :  */
                               1083                 : #define SK_BT_REQFWD    0x00010000  /* required to continue forward scan */
                               1084                 : #define SK_BT_REQBKWD   0x00020000  /* required to continue backward scan */
                               1085                 : #define SK_BT_INDOPTION_SHIFT  24   /* must clear the above bits */
                               1086                 : #define SK_BT_DESC          (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT)
                               1087                 : #define SK_BT_NULLS_FIRST   (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT)
                               1088                 : 
                               1089                 : typedef struct BTOptions
                               1090                 : {
                               1091                 :     int32       varlena_header_;    /* varlena header (do not touch directly!) */
                               1092                 :     int         fillfactor;     /* page fill factor in percent (0..100) */
                               1093                 :     float8      vacuum_cleanup_index_scale_factor;  /* deprecated */
                               1094                 :     bool        deduplicate_items;  /* Try to deduplicate items? */
                               1095                 : } BTOptions;
                               1096                 : 
                               1097                 : #define BTGetFillFactor(relation) \
                               1098                 :     (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
                               1099                 :                  relation->rd_rel->relam == BTREE_AM_OID), \
                               1100                 :      (relation)->rd_options ? \
                               1101                 :      ((BTOptions *) (relation)->rd_options)->fillfactor : \
                               1102                 :      BTREE_DEFAULT_FILLFACTOR)
                               1103                 : #define BTGetTargetPageFreeSpace(relation) \
                               1104                 :     (BLCKSZ * (100 - BTGetFillFactor(relation)) / 100)
                               1105                 : #define BTGetDeduplicateItems(relation) \
                               1106                 :     (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \
                               1107                 :                  relation->rd_rel->relam == BTREE_AM_OID), \
                               1108                 :     ((relation)->rd_options ? \
                               1109                 :      ((BTOptions *) (relation)->rd_options)->deduplicate_items : true))
                               1110                 : 
                               1111                 : /*
                               1112                 :  * Constant definition for progress reporting.  Phase numbers must match
                               1113                 :  * btbuildphasename.
                               1114                 :  */
                               1115                 : /* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
                               1116                 : #define PROGRESS_BTREE_PHASE_INDEXBUILD_TABLESCAN       2
                               1117                 : #define PROGRESS_BTREE_PHASE_PERFORMSORT_1              3
                               1118                 : #define PROGRESS_BTREE_PHASE_PERFORMSORT_2              4
                               1119                 : #define PROGRESS_BTREE_PHASE_LEAF_LOAD                  5
                               1120                 : 
                               1121                 : /*
                               1122                 :  * external entry points for btree, in nbtree.c
                               1123                 :  */
                               1124                 : extern void btbuildempty(Relation index);
                               1125                 : extern bool btinsert(Relation rel, Datum *values, bool *isnull,
                               1126                 :                      ItemPointer ht_ctid, Relation heapRel,
                               1127                 :                      IndexUniqueCheck checkUnique,
                               1128                 :                      bool indexUnchanged,
                               1129                 :                      struct IndexInfo *indexInfo);
                               1130                 : extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
                               1131                 : extern Size btestimateparallelscan(void);
                               1132                 : extern void btinitparallelscan(void *target);
                               1133                 : extern bool btgettuple(IndexScanDesc scan, ScanDirection dir);
                               1134                 : extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
                               1135                 : extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
                               1136                 :                      ScanKey orderbys, int norderbys);
                               1137                 : extern void btparallelrescan(IndexScanDesc scan);
                               1138                 : extern void btendscan(IndexScanDesc scan);
                               1139                 : extern void btmarkpos(IndexScanDesc scan);
                               1140                 : extern void btrestrpos(IndexScanDesc scan);
                               1141                 : extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info,
                               1142                 :                                            IndexBulkDeleteResult *stats,
                               1143                 :                                            IndexBulkDeleteCallback callback,
                               1144                 :                                            void *callback_state);
                               1145                 : extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info,
                               1146                 :                                               IndexBulkDeleteResult *stats);
                               1147                 : extern bool btcanreturn(Relation index, int attno);
                               1148                 : 
                               1149                 : /*
                               1150                 :  * prototypes for internal functions in nbtree.c
                               1151                 :  */
                               1152                 : extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno);
                               1153                 : extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
                               1154                 : extern void _bt_parallel_done(IndexScanDesc scan);
                               1155                 : extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
                               1156                 : 
                               1157                 : /*
                               1158                 :  * prototypes for functions in nbtdedup.c
                               1159                 :  */
                               1160                 : extern void _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel,
                               1161                 :                            IndexTuple newitem, Size newitemsz,
                               1162                 :                            bool bottomupdedup);
                               1163                 : extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel,
                               1164                 :                                  Size newitemsz);
                               1165                 : extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base,
                               1166                 :                                     OffsetNumber baseoff);
                               1167                 : extern bool _bt_dedup_save_htid(BTDedupState state, IndexTuple itup);
                               1168                 : extern Size _bt_dedup_finish_pending(Page newpage, BTDedupState state);
                               1169                 : extern IndexTuple _bt_form_posting(IndexTuple base, ItemPointer htids,
                               1170                 :                                    int nhtids);
                               1171                 : extern void _bt_update_posting(BTVacuumPosting vacposting);
                               1172                 : extern IndexTuple _bt_swap_posting(IndexTuple newitem, IndexTuple oposting,
                               1173                 :                                    int postingoff);
                               1174                 : 
                               1175                 : /*
                               1176                 :  * prototypes for functions in nbtinsert.c
                               1177                 :  */
                               1178                 : extern bool _bt_doinsert(Relation rel, IndexTuple itup,
                               1179                 :                          IndexUniqueCheck checkUnique, bool indexUnchanged,
                               1180                 :                          Relation heapRel);
                               1181                 : extern void _bt_finish_split(Relation rel, Relation heaprel, Buffer lbuf,
                               1182                 :                              BTStack stack);
                               1183                 : extern Buffer _bt_getstackbuf(Relation rel, Relation heaprel, BTStack stack,
                               1184                 :                               BlockNumber child);
                               1185                 : 
                               1186                 : /*
                               1187                 :  * prototypes for functions in nbtsplitloc.c
                               1188                 :  */
                               1189                 : extern OffsetNumber _bt_findsplitloc(Relation rel, Page origpage,
                               1190                 :                                      OffsetNumber newitemoff, Size newitemsz, IndexTuple newitem,
                               1191                 :                                      bool *newitemonleft);
                               1192                 : 
                               1193                 : /*
                               1194                 :  * prototypes for functions in nbtpage.c
                               1195                 :  */
                               1196                 : extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
                               1197                 :                              bool allequalimage);
                               1198                 : extern bool _bt_vacuum_needs_cleanup(Relation rel, Relation heaprel);
                               1199                 : extern void _bt_set_cleanup_info(Relation rel, Relation heaprel,
                               1200                 :                                  BlockNumber num_delpages);
                               1201                 : extern void _bt_upgrademetapage(Page page);
                               1202                 : extern Buffer _bt_getroot(Relation rel, Relation heaprel, int access);
                               1203                 : extern Buffer _bt_gettrueroot(Relation rel, Relation heaprel);
                               1204                 : extern int  _bt_getrootheight(Relation rel, Relation heaprel);
                               1205                 : extern void _bt_metaversion(Relation rel, Relation heaprel, bool *heapkeyspace,
                               1206                 :                             bool *allequalimage);
                               1207                 : extern void _bt_checkpage(Relation rel, Buffer buf);
                               1208                 : extern Buffer _bt_getbuf(Relation rel, Relation heaprel, BlockNumber blkno,
                               1209                 :                          int access);
                               1210                 : extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
                               1211                 :                                BlockNumber blkno, int access);
                               1212                 : extern void _bt_relbuf(Relation rel, Buffer buf);
                               1213                 : extern void _bt_lockbuf(Relation rel, Buffer buf, int access);
                               1214                 : extern void _bt_unlockbuf(Relation rel, Buffer buf);
                               1215                 : extern bool _bt_conditionallockbuf(Relation rel, Buffer buf);
                               1216                 : extern void _bt_upgradelockbufcleanup(Relation rel, Buffer buf);
                               1217                 : extern void _bt_pageinit(Page page, Size size);
                               1218                 : extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
                               1219                 :                                 OffsetNumber *deletable, int ndeletable,
                               1220                 :                                 BTVacuumPosting *updatable, int nupdatable);
                               1221                 : extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
                               1222                 :                                       Relation heapRel,
                               1223                 :                                       TM_IndexDeleteOp *delstate);
                               1224                 : extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
                               1225                 : extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate,
                               1226                 :                                 bool cleanuponly);
                               1227                 : extern void _bt_pendingfsm_finalize(Relation rel, BTVacState *vstate);
                               1228                 : 
                               1229                 : /*
                               1230                 :  * prototypes for functions in nbtsearch.c
                               1231                 :  */
                               1232                 : extern BTStack _bt_search(Relation rel, Relation heaprel, BTScanInsert key,
                               1233                 :                           Buffer *bufP, int access, Snapshot snapshot);
                               1234                 : extern Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
                               1235                 :                             Buffer buf, bool forupdate, BTStack stack,
                               1236                 :                             int access, Snapshot snapshot);
                               1237                 : extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
                               1238                 : extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
                               1239                 : extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
                               1240                 : extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
                               1241                 : extern Buffer _bt_get_endpoint(Relation rel, Relation heaprel, uint32 level,
                               1242                 :                                bool rightmost, Snapshot snapshot);
                               1243                 : 
                               1244                 : /*
                               1245                 :  * prototypes for functions in nbtutils.c
                               1246                 :  */
                               1247                 : extern BTScanInsert _bt_mkscankey(Relation rel, Relation heaprel, IndexTuple itup);
                               1248                 : extern void _bt_freestack(BTStack stack);
                               1249                 : extern void _bt_preprocess_array_keys(IndexScanDesc scan);
                               1250                 : extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
                               1251                 : extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
                               1252                 : extern void _bt_mark_array_keys(IndexScanDesc scan);
                               1253                 : extern void _bt_restore_array_keys(IndexScanDesc scan);
                               1254                 : extern void _bt_preprocess_keys(IndexScanDesc scan);
                               1255                 : extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
                               1256                 :                           int tupnatts, ScanDirection dir, bool *continuescan);
                               1257                 : extern void _bt_killitems(IndexScanDesc scan);
                               1258                 : extern BTCycleId _bt_vacuum_cycleid(Relation rel);
                               1259                 : extern BTCycleId _bt_start_vacuum(Relation rel);
                               1260                 : extern void _bt_end_vacuum(Relation rel);
                               1261                 : extern void _bt_end_vacuum_callback(int code, Datum arg);
                               1262                 : extern Size BTreeShmemSize(void);
                               1263                 : extern void BTreeShmemInit(void);
                               1264                 : extern bytea *btoptions(Datum reloptions, bool validate);
                               1265                 : extern bool btproperty(Oid index_oid, int attno,
                               1266                 :                        IndexAMProperty prop, const char *propname,
                               1267                 :                        bool *res, bool *isnull);
                               1268                 : extern char *btbuildphasename(int64 phasenum);
                               1269                 : extern IndexTuple _bt_truncate(Relation rel, IndexTuple lastleft,
                               1270                 :                                IndexTuple firstright, BTScanInsert itup_key);
                               1271                 : extern int  _bt_keep_natts_fast(Relation rel, IndexTuple lastleft,
                               1272                 :                                 IndexTuple firstright);
                               1273                 : extern bool _bt_check_natts(Relation rel, bool heapkeyspace, Page page,
                               1274                 :                             OffsetNumber offnum);
                               1275                 : extern void _bt_check_third_page(Relation rel, Relation heap,
                               1276                 :                                  bool needheaptidspace, Page page, IndexTuple newtup);
                               1277                 : extern bool _bt_allequalimage(Relation rel, bool debugmessage);
                               1278                 : 
                               1279                 : /*
                               1280                 :  * prototypes for functions in nbtvalidate.c
                               1281                 :  */
                               1282                 : extern bool btvalidate(Oid opclassoid);
                               1283                 : extern void btadjustmembers(Oid opfamilyoid,
                               1284                 :                             Oid opclassoid,
                               1285                 :                             List *operators,
                               1286                 :                             List *functions);
                               1287                 : 
                               1288                 : /*
                               1289                 :  * prototypes for functions in nbtsort.c
                               1290                 :  */
                               1291                 : extern IndexBuildResult *btbuild(Relation heap, Relation index,
                               1292                 :                                  struct IndexInfo *indexInfo);
                               1293                 : extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc);
                               1294                 : 
                               1295                 : #endif                          /* NBTREE_H */
        

Generated by: LCOV version v1.16-55-g56c0a2a