LCOV - differential code coverage report
Current view: top level - src/backend/access/gist - gist.c (source / functions) Coverage Total Hit UNC UIC UBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 71.3 % 578 412 1 4 161 20 4 388 4 21 1 3
Current Date: 2023-04-08 17:13:01 Functions: 76.5 % 17 13 1 3 4 1 8 4
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 3 3 3
Legend: Lines: hit not hit (120,180] days: 0.0 % 1 0 1
(180,240] days: 100.0 % 1 1 1
(240..) days: 71.2 % 573 408 4 161 20 388 3 21
Function coverage date bins:
(240..) days: 61.9 % 21 13 1 3 4 1 8 4

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * gist.c
                                  4                 :  *    interface routines for the postgres GiST index access method.
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  8                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/access/gist/gist.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : #include "postgres.h"
                                 16                 : 
                                 17                 : #include "access/gist_private.h"
                                 18                 : #include "access/gistscan.h"
                                 19                 : #include "access/xloginsert.h"
                                 20                 : #include "catalog/pg_collation.h"
                                 21                 : #include "commands/vacuum.h"
                                 22                 : #include "miscadmin.h"
                                 23                 : #include "nodes/execnodes.h"
                                 24                 : #include "storage/lmgr.h"
                                 25                 : #include "storage/predicate.h"
                                 26                 : #include "utils/builtins.h"
                                 27                 : #include "utils/index_selfuncs.h"
                                 28                 : #include "utils/memutils.h"
                                 29                 : #include "utils/rel.h"
                                 30                 : 
                                 31                 : /* non-export function prototypes */
                                 32                 : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
                                 33                 : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
                                 34                 :                             GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
                                 35                 : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
                                 36                 :                              GISTSTATE *giststate,
                                 37                 :                              IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
                                 38                 :                              Buffer leftchild, Buffer rightchild,
                                 39                 :                              bool unlockbuf, bool unlockleftchild);
                                 40                 : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
                                 41                 :                             GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
                                 42                 : static void gistprunepage(Relation rel, Page page, Buffer buffer,
                                 43                 :                           Relation heapRel);
                                 44                 : 
                                 45                 : 
                                 46                 : #define ROTATEDIST(d) do { \
                                 47                 :     SplitedPageLayout *tmp=(SplitedPageLayout*)palloc0(sizeof(SplitedPageLayout)); \
                                 48                 :     tmp->block.blkno = InvalidBlockNumber;   \
                                 49                 :     tmp->buffer = InvalidBuffer; \
                                 50                 :     tmp->next = (d); \
                                 51                 :     (d)=tmp; \
                                 52                 : } while(0)
                                 53                 : 
                                 54                 : 
                                 55                 : /*
                                 56                 :  * GiST handler function: return IndexAmRoutine with access method parameters
                                 57                 :  * and callbacks.
                                 58                 :  */
                                 59                 : Datum
 2639 tgl                        60 CBC        2179 : gisthandler(PG_FUNCTION_ARGS)
                                 61                 : {
                                 62            2179 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
                                 63                 : 
                                 64            2179 :     amroutine->amstrategies = 0;
 2537 teodor                     65            2179 :     amroutine->amsupport = GISTNProcs;
 1105 akorotkov                  66            2179 :     amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
 2639 tgl                        67            2179 :     amroutine->amcanorder = false;
                                 68            2179 :     amroutine->amcanorderbyop = true;
                                 69            2179 :     amroutine->amcanbackward = false;
                                 70            2179 :     amroutine->amcanunique = false;
                                 71            2179 :     amroutine->amcanmulticol = true;
                                 72            2179 :     amroutine->amoptionalkey = true;
                                 73            2179 :     amroutine->amsearcharray = false;
                                 74            2179 :     amroutine->amsearchnulls = true;
                                 75            2179 :     amroutine->amstorage = true;
                                 76            2179 :     amroutine->amclusterable = true;
 1839 teodor                     77            2179 :     amroutine->ampredlocks = true;
 2244 rhaas                      78            2179 :     amroutine->amcanparallel = false;
 1491 akorotkov                  79            2179 :     amroutine->amcaninclude = true;
 1180 akapila                    80            2179 :     amroutine->amusemaintenanceworkmem = false;
   20 tomas.vondra               81 GNC        2179 :     amroutine->amsummarizing = false;
 1180 akapila                    82 CBC        2179 :     amroutine->amparallelvacuumoptions =
 1180 akapila                    83 ECB             :         VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
 2639 tgl                        84 GIC        2179 :     amroutine->amkeytype = InvalidOid;
 2639 tgl                        85 ECB             : 
 2639 tgl                        86 GIC        2179 :     amroutine->ambuild = gistbuild;
 2639 tgl                        87 CBC        2179 :     amroutine->ambuildempty = gistbuildempty;
                                 88            2179 :     amroutine->aminsert = gistinsert;
                                 89            2179 :     amroutine->ambulkdelete = gistbulkdelete;
                                 90            2179 :     amroutine->amvacuumcleanup = gistvacuumcleanup;
                                 91            2179 :     amroutine->amcanreturn = gistcanreturn;
                                 92            2179 :     amroutine->amcostestimate = gistcostestimate;
                                 93            2179 :     amroutine->amoptions = gistoptions;
 2430                            94            2179 :     amroutine->amproperty = gistproperty;
 1468 alvherre                   95            2179 :     amroutine->ambuildphasename = NULL;
 2639 tgl                        96            2179 :     amroutine->amvalidate = gistvalidate;
  981                            97            2179 :     amroutine->amadjustmembers = gistadjustmembers;
 2639                            98            2179 :     amroutine->ambeginscan = gistbeginscan;
                                 99            2179 :     amroutine->amrescan = gistrescan;
                                100            2179 :     amroutine->amgettuple = gistgettuple;
                                101            2179 :     amroutine->amgetbitmap = gistgetbitmap;
                                102            2179 :     amroutine->amendscan = gistendscan;
                                103            2179 :     amroutine->ammarkpos = NULL;
                                104            2179 :     amroutine->amrestrpos = NULL;
 2266 rhaas                     105            2179 :     amroutine->amestimateparallelscan = NULL;
                                106            2179 :     amroutine->aminitparallelscan = NULL;
                                107            2179 :     amroutine->amparallelrescan = NULL;
 2639 tgl                       108 ECB             : 
 2639 tgl                       109 GIC        2179 :     PG_RETURN_POINTER(amroutine);
 2639 tgl                       110 ECB             : }
                                111                 : 
                                112                 : /*
                                113                 :  * Create and return a temporary memory context for use by GiST. We
                                114                 :  * _always_ invoke user-provided methods in a temporary memory
                                115                 :  * context, so that memory leaks in those functions cannot cause
                                116                 :  * problems. Also, we use some additional temporary contexts in the
                                117                 :  * GiST code itself, to avoid the need to do some awkward manual
                                118                 :  * memory management.
                                119                 :  */
                                120                 : MemoryContext
 6408 bruce                     121 GIC        1929 : createTempGistContext(void)
 6408 bruce                     122 ECB             : {
 6408 bruce                     123 GIC        1929 :     return AllocSetContextCreate(CurrentMemoryContext,
 6408 bruce                     124 ECB             :                                  "GiST temporary context",
                                125                 :                                  ALLOCSET_DEFAULT_SIZES);
                                126                 : }
                                127                 : 
                                128                 : /*
                                129                 :  *  gistbuildempty() -- build an empty gist index in the initialization fork
                                130                 :  */
                                131                 : void
 2639 tgl                       132 GIC           3 : gistbuildempty(Relation index)
 4484 rhaas                     133 ECB             : {
                                134                 :     Buffer      buffer;
                                135                 : 
                                136                 :     /* Initialize the root page */
    4 andres                    137 GNC           3 :     buffer = ExtendBufferedRel(EB_REL(index), INIT_FORKNUM, NULL,
                                138                 :                                EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
                                139                 : 
                                140                 :     /* Initialize and xlog buffer */
 3709 heikki.linnakangas        141 GIC           3 :     START_CRIT_SECTION();
 3709 heikki.linnakangas        142 CBC           3 :     GISTInitBuffer(buffer, F_LEAF);
                                143               3 :     MarkBufferDirty(buffer);
 3413                           144               3 :     log_newpage_buffer(buffer, true);
 3709                           145               3 :     END_CRIT_SECTION();
 3709 heikki.linnakangas        146 ECB             : 
                                147                 :     /* Unlock and release the buffer */
 3709 heikki.linnakangas        148 GIC           3 :     UnlockReleaseBuffer(buffer);
 4484 rhaas                     149 CBC           3 : }
 4484 rhaas                     150 ECB             : 
                                151                 : /*
                                152                 :  *  gistinsert -- wrapper for GiST tuple insertion.
                                153                 :  *
                                154                 :  *    This is the public interface routine for tuple insertion in GiSTs.
                                155                 :  *    It doesn't do any work; just locks the relation and passes the buck.
                                156                 :  */
                                157                 : bool
 2639 tgl                       158 GIC      151307 : gistinsert(Relation r, Datum *values, bool *isnull,
 2639 tgl                       159 ECB             :            ItemPointer ht_ctid, Relation heapRel,
                                160                 :            IndexUniqueCheck checkUnique,
                                161                 :            bool indexUnchanged,
                                162                 :            IndexInfo *indexInfo)
                                163                 : {
 2250 tgl                       164 GIC      151307 :     GISTSTATE  *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
 9344 bruce                     165 ECB             :     IndexTuple  itup;
                                166                 :     MemoryContext oldCxt;
                                167                 : 
                                168                 :     /* Initialize GISTSTATE cache if first call in this statement */
 2250 tgl                       169 GIC      151307 :     if (giststate == NULL)
 2250 tgl                       170 ECB             :     {
 2250 tgl                       171 GIC         237 :         oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
 2250 tgl                       172 CBC         237 :         giststate = initGISTstate(r);
                                173             237 :         giststate->tempCxt = createTempGistContext();
                                174             237 :         indexInfo->ii_AmCache = (void *) giststate;
                                175             237 :         MemoryContextSwitchTo(oldCxt);
 2250 tgl                       176 ECB             :     }
                                177                 : 
 4209 tgl                       178 GIC      151307 :     oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
 9345 bruce                     179 ECB             : 
 4209 tgl                       180 GIC      151307 :     itup = gistFormTuple(giststate, r,
 6031 bruce                     181 ECB             :                          values, isnull, true /* size is currently bogus */ );
 9345 bruce                     182 GIC      151307 :     itup->t_tid = *ht_ctid;
 9345 bruce                     183 ECB             : 
 1467 heikki.linnakangas        184 GIC      151307 :     gistdoinsert(r, itup, 0, giststate, heapRel, false);
 8881 vadim4o                   185 ECB             : 
                                186                 :     /* cleanup */
 4209 tgl                       187 GIC      151301 :     MemoryContextSwitchTo(oldCxt);
 2250 tgl                       188 CBC      151301 :     MemoryContextReset(giststate->tempCxt);
 9345 bruce                     189 ECB             : 
 2639 tgl                       190 GIC      151301 :     return false;
 9722 scrappy                   191 ECB             : }
                                192                 : 
                                193                 : 
                                194                 : /*
                                195                 :  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
                                196                 :  * at that offset is atomically removed along with inserting the new tuples.
                                197                 :  * This is used to replace a tuple with a new one.
                                198                 :  *
                                199                 :  * If 'leftchildbuf' is valid, we're inserting the downlink for the page
                                200                 :  * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
                                201                 :  * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
                                202                 :  *
                                203                 :  * If 'markfollowright' is true and the page is split, the left child is
                                204                 :  * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
                                205                 :  * index build, however, there is no concurrent access and the page splitting
                                206                 :  * is done in a slightly simpler fashion, and false is passed.
                                207                 :  *
                                208                 :  * If there is not enough room on the page, it is split. All the split
                                209                 :  * pages are kept pinned and locked and returned in *splitinfo, the caller
                                210                 :  * is responsible for inserting the downlinks for them. However, if
                                211                 :  * 'buffer' is the root page and it needs to be split, gistplacetopage()
                                212                 :  * performs the split as one atomic operation, and *splitinfo is set to NIL.
                                213                 :  * In that case, we continue to hold the root page locked, and the child
                                214                 :  * pages are released; note that new tuple(s) are *not* on the root page
                                215                 :  * but in one of the new child pages.
                                216                 :  *
                                217                 :  * If 'newblkno' is not NULL, returns the block number of page the first
                                218                 :  * new/updated tuple was inserted to. Usually it's the given page, but could
                                219                 :  * be its right sibling if the page was split.
                                220                 :  *
                                221                 :  * Returns 'true' if the page was split, 'false' otherwise.
                                222                 :  */
                                223                 : bool
 4231 heikki.linnakangas        224 GIC      847063 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
 4490 heikki.linnakangas        225 ECB             :                 Buffer buffer,
                                226                 :                 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
                                227                 :                 BlockNumber *newblkno,
                                228                 :                 Buffer leftchildbuf,
                                229                 :                 List **splitinfo,
                                230                 :                 bool markfollowright,
                                231                 :                 Relation heapRel,
                                232                 :                 bool is_build)
                                233                 : {
 3888 heikki.linnakangas        234 GIC      847063 :     BlockNumber blkno = BufferGetBlockNumber(buffer);
 2545 kgrittn                   235 CBC      847063 :     Page        page = BufferGetPage(buffer);
 4490 heikki.linnakangas        236          847063 :     bool        is_leaf = (GistPageIsLeaf(page)) ? true : false;
 4490 heikki.linnakangas        237 ECB             :     XLogRecPtr  recptr;
                                238                 :     bool        is_split;
                                239                 : 
                                240                 :     /*
                                241                 :      * Refuse to modify a page that's incompletely split. This should not
                                242                 :      * happen because we finish any incomplete splits while we walk down the
                                243                 :      * tree. However, it's remotely possible that another concurrent inserter
                                244                 :      * splits a parent page, and errors out before completing the split. We
                                245                 :      * will just throw an error in that case, and leave any split we had in
                                246                 :      * progress unfinished too. The next insert that comes along will clean up
                                247                 :      * the mess.
                                248                 :      */
 4490 heikki.linnakangas        249 CBC      847063 :     if (GistFollowRight(page))
 4490 heikki.linnakangas        250 UBC           0 :         elog(ERROR, "concurrent GiST page split was incomplete");
                                251                 : 
                                252                 :     /* should never try to insert to a deleted page */
  809 heikki.linnakangas        253 CBC      847063 :     Assert(!GistPageIsDeleted(page));
                                254                 : 
 4490                           255          847063 :     *splitinfo = NIL;
                                256                 : 
                                257                 :     /*
                                258                 :      * if isupdate, remove old key: This node's key has been modified, either
                                259                 :      * because a child split occurred or because we needed to adjust our key
                                260                 :      * for an insert in a child node. Therefore, remove the old version of
                                261                 :      * this node's key.
                                262                 :      *
                                263                 :      * for WAL replay, in the non-split case we handle this by setting up a
                                264                 :      * one-element todelete array; in the split case, it's handled implicitly
                                265                 :      * because the tuple vector passed to gistSplit won't include this tuple.
                                266                 :      */
 4231                           267          847063 :     is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
                                268                 : 
                                269                 :     /*
                                270                 :      * If leaf page is full, try at first to delete dead tuples. And then
                                271                 :      * check again.
                                272                 :      */
 2769 teodor                    273          847063 :     if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
                                274                 :     {
 1496 heikki.linnakangas        275 UBC           0 :         gistprunepage(rel, page, buffer, heapRel);
 2769 teodor                    276               0 :         is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
                                277                 :     }
                                278                 : 
 4490 heikki.linnakangas        279 CBC      847063 :     if (is_split)
                                280                 :     {
                                281                 :         /* no space for insertion */
                                282                 :         IndexTuple *itvec;
                                283                 :         int         tlen;
 6408 bruce                     284           12298 :         SplitedPageLayout *dist = NULL,
                                285                 :                    *ptr;
 4490 heikki.linnakangas        286           12298 :         BlockNumber oldrlink = InvalidBlockNumber;
 3941                           287           12298 :         GistNSN     oldnsn = 0;
                                288                 :         SplitedPageLayout rootpg;
                                289                 :         bool        is_rootsplit;
                                290                 :         int         npage;
                                291                 : 
 4490                           292           12298 :         is_rootsplit = (blkno == GIST_ROOT_BLKNO);
                                293                 : 
                                294                 :         /*
                                295                 :          * Form index tuples vector to split. If we're replacing an old tuple,
                                296                 :          * remove the old version from the vector.
                                297                 :          */
                                298           12298 :         itvec = gistextractpage(page, &tlen);
                                299           12298 :         if (OffsetNumberIsValid(oldoffnum))
                                300                 :         {
                                301                 :             /* on inner page we should remove old tuple */
                                302            2391 :             int         pos = oldoffnum - FirstOffsetNumber;
                                303                 : 
 6031 bruce                     304            2391 :             tlen--;
                                305            2391 :             if (pos != tlen)
                                306            1516 :                 memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
                                307                 :         }
 4490 heikki.linnakangas        308           12298 :         itvec = gistjoinvector(itvec, &tlen, itup, ntup);
 4231                           309           12298 :         dist = gistSplit(rel, page, itvec, tlen, giststate);
                                310                 : 
                                311                 :         /*
                                312                 :          * Check that split didn't produce too many pages.
                                313                 :          */
 3293                           314           12298 :         npage = 0;
                                315           36921 :         for (ptr = dist; ptr; ptr = ptr->next)
                                316           24623 :             npage++;
                                317                 :         /* in a root split, we'll add one more page to the list below */
                                318           12298 :         if (is_rootsplit)
                                319             192 :             npage++;
                                320           12298 :         if (npage > GIST_MAX_SPLIT_PAGES)
 3293 heikki.linnakangas        321 UBC           0 :             elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
                                322                 :                  npage, GIST_MAX_SPLIT_PAGES);
                                323                 : 
                                324                 :         /*
                                325                 :          * Set up pages to work with. Allocate new buffers for all but the
                                326                 :          * leftmost page. The original page becomes the new leftmost page, and
                                327                 :          * is just replaced with the new contents.
                                328                 :          *
                                329                 :          * For a root-split, allocate new buffers for all child pages, the
                                330                 :          * original page is overwritten with new root page containing
                                331                 :          * downlinks to the new child pages.
                                332                 :          */
 4490 heikki.linnakangas        333 CBC       12298 :         ptr = dist;
                                334           12298 :         if (!is_rootsplit)
                                335                 :         {
                                336                 :             /* save old rightlink and NSN */
                                337           12106 :             oldrlink = GistPageGetOpaque(page)->rightlink;
 3734                           338           12106 :             oldnsn = GistPageGetNSN(page);
                                339                 : 
 4490                           340           12106 :             dist->buffer = buffer;
                                341           12106 :             dist->block.blkno = BufferGetBlockNumber(buffer);
 2545 kgrittn                   342           12106 :             dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
                                343                 : 
                                344                 :             /* clean all flags except F_LEAF */
 6178 teodor                    345           12106 :             GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
                                346                 : 
 4490 heikki.linnakangas        347           12106 :             ptr = ptr->next;
                                348                 :         }
                                349           24815 :         for (; ptr; ptr = ptr->next)
                                350                 :         {
                                351                 :             /* Allocate new page */
    8 andres                    352 GNC       12517 :             ptr->buffer = gistNewBuffer(rel, heapRel);
 4490 heikki.linnakangas        353 CBC       12517 :             GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
 2545 kgrittn                   354           12517 :             ptr->page = BufferGetPage(ptr->buffer);
 4490 heikki.linnakangas        355           12517 :             ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
 1839 teodor                    356           12517 :             PredicateLockPageSplit(rel,
                                357                 :                                    BufferGetBlockNumber(buffer),
                                358                 :                                    BufferGetBlockNumber(ptr->buffer));
                                359                 :         }
                                360                 : 
                                361                 :         /*
                                362                 :          * Now that we know which blocks the new pages go to, set up downlink
                                363                 :          * tuples to point to them.
                                364                 :          */
 6031 bruce                     365           36921 :         for (ptr = dist; ptr; ptr = ptr->next)
                                366                 :         {
 4490 heikki.linnakangas        367           24623 :             ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
                                368           24623 :             GistTupleSetValid(ptr->itup);
                                369                 :         }
                                370                 : 
                                371                 :         /*
                                372                 :          * If this is a root split, we construct the new root page with the
                                373                 :          * downlinks here directly, instead of requiring the caller to insert
                                374                 :          * them. Add the new root page to the list along with the child pages.
                                375                 :          */
                                376           12298 :         if (is_rootsplit)
                                377                 :         {
                                378                 :             IndexTuple *downlinks;
 4382 bruce                     379             192 :             int         ndownlinks = 0;
                                380                 :             int         i;
                                381                 : 
 4490 heikki.linnakangas        382             192 :             rootpg.buffer = buffer;
 2545 kgrittn                   383             192 :             rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
 4490 heikki.linnakangas        384             192 :             GistPageGetOpaque(rootpg.page)->flags = 0;
                                385                 : 
                                386                 :             /* Prepare a vector of all the downlinks */
                                387             579 :             for (ptr = dist; ptr; ptr = ptr->next)
                                388             387 :                 ndownlinks++;
                                389             192 :             downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
                                390             579 :             for (i = 0, ptr = dist; ptr; ptr = ptr->next)
                                391             387 :                 downlinks[i++] = ptr->itup;
                                392                 : 
                                393             192 :             rootpg.block.blkno = GIST_ROOT_BLKNO;
                                394             192 :             rootpg.block.num = ndownlinks;
                                395             192 :             rootpg.list = gistfillitupvec(downlinks, ndownlinks,
                                396                 :                                           &(rootpg.lenlist));
                                397             192 :             rootpg.itup = NULL;
                                398                 : 
                                399             192 :             rootpg.next = dist;
                                400             192 :             dist = &rootpg;
                                401                 :         }
                                402                 :         else
                                403                 :         {
                                404                 :             /* Prepare split-info to be returned to caller */
                                405           36342 :             for (ptr = dist; ptr; ptr = ptr->next)
                                406                 :             {
                                407           24236 :                 GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
                                408                 : 
                                409           24236 :                 si->buf = ptr->buffer;
                                410           24236 :                 si->downlink = ptr->itup;
                                411           24236 :                 *splitinfo = lappend(*splitinfo, si);
                                412                 :             }
                                413                 :         }
                                414                 : 
                                415                 :         /*
                                416                 :          * Fill all pages. All the pages are new, ie. freshly allocated empty
                                417                 :          * pages, or a temporary copy of the old page.
                                418                 :          */
                                419           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
                                420                 :         {
 4382 bruce                     421           24815 :             char       *data = (char *) (ptr->list);
                                422                 : 
  228 drowley                   423 GNC      923771 :             for (int i = 0; i < ptr->block.num; i++)
                                424                 :             {
 3888 heikki.linnakangas        425 CBC      898956 :                 IndexTuple  thistup = (IndexTuple) data;
                                426                 : 
                                427          898956 :                 if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
 4231 heikki.linnakangas        428 UBC           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
                                429                 : 
                                430                 :                 /*
                                431                 :                  * If this is the first inserted/updated tuple, let the caller
                                432                 :                  * know which page it landed on.
                                433                 :                  */
 3888 heikki.linnakangas        434 CBC      898956 :                 if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
                                435             387 :                     *newblkno = ptr->block.blkno;
                                436                 : 
                                437          898956 :                 data += IndexTupleSize(thistup);
                                438                 :             }
                                439                 : 
                                440                 :             /* Set up rightlinks */
 4490                           441           24815 :             if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
                                442           24650 :                 GistPageGetOpaque(ptr->page)->rightlink =
                                443           12325 :                     ptr->next->block.blkno;
                                444                 :             else
                                445           12490 :                 GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
                                446                 : 
                                447                 :             /*
                                448                 :              * Mark the all but the right-most page with the follow-right
                                449                 :              * flag. It will be cleared as soon as the downlink is inserted
                                450                 :              * into the parent, but this ensures that if we error out before
                                451                 :              * that, the index is still consistent. (in buffering build mode,
                                452                 :              * any error will abort the index build anyway, so this is not
                                453                 :              * needed.)
                                454                 :              */
 4231                           455           24815 :             if (ptr->next && !is_rootsplit && markfollowright)
 4490                           456           11746 :                 GistMarkFollowRight(ptr->page);
                                457                 :             else
                                458           13069 :                 GistClearFollowRight(ptr->page);
                                459                 : 
                                460                 :             /*
                                461                 :              * Copy the NSN of the original page to all pages. The
                                462                 :              * F_FOLLOW_RIGHT flags ensure that scans will follow the
                                463                 :              * rightlinks until the downlinks are inserted.
                                464                 :              */
 3734                           465           24815 :             GistPageSetNSN(ptr->page, oldnsn);
                                466                 :         }
                                467                 : 
                                468                 :         /*
                                469                 :          * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
                                470                 :          * insertion for that. NB: The number of pages and data segments
                                471                 :          * specified here must match the calculations in gistXLogSplit()!
                                472                 :          */
 1467                           473           12298 :         if (!is_build && RelationNeedsWAL(rel))
 3062                           474            1730 :             XLogEnsureRecordSpace(npage, 1 + npage * 2);
                                475                 : 
 6178 teodor                    476           12298 :         START_CRIT_SECTION();
                                477                 : 
                                478                 :         /*
                                479                 :          * Must mark buffers dirty before XLogInsert, even though we'll still
                                480                 :          * be changing their opaque fields below.
                                481                 :          */
 6031 bruce                     482           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
 6218 tgl                       483           24815 :             MarkBufferDirty(ptr->buffer);
 4490 heikki.linnakangas        484           12298 :         if (BufferIsValid(leftchildbuf))
                                485            2364 :             MarkBufferDirty(leftchildbuf);
                                486                 : 
                                487                 :         /*
                                488                 :          * The first page in the chain was a temporary working copy meant to
                                489                 :          * replace the old page. Copy it over the old page.
                                490                 :          */
 2545 kgrittn                   491           12298 :         PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
                                492           12298 :         dist->page = BufferGetPage(dist->buffer);
                                493                 : 
                                494                 :         /*
                                495                 :          * Write the WAL record.
                                496                 :          *
                                497                 :          * If we're building a new index, however, we don't WAL-log changes
                                498                 :          * yet. The LSN-NSN interlock between parent and child requires that
                                499                 :          * LSNs never move backwards, so set the LSNs to a value that's
                                500                 :          * smaller than any real or fake unlogged LSN that might be generated
                                501                 :          * later. (There can't be any concurrent scans during index build, so
                                502                 :          * we don't need to be able to detect concurrent splits yet.)
                                503                 :          */
 1467 heikki.linnakangas        504           12298 :         if (is_build)
                                505           10565 :             recptr = GistBuildLSN;
                                506                 :         else
                                507                 :         {
                                508            1733 :             if (RelationNeedsWAL(rel))
                                509            1730 :                 recptr = gistXLogSplit(is_leaf,
                                510                 :                                        dist, oldrlink, oldnsn, leftchildbuf,
                                511                 :                                        markfollowright);
                                512                 :             else
                                513               3 :                 recptr = gistGetFakeLSN(rel);
                                514                 :         }
                                515                 : 
 6031 bruce                     516           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
 4490 heikki.linnakangas        517           24815 :             PageSetLSN(ptr->page, recptr);
                                518                 : 
                                519                 :         /*
                                520                 :          * Return the new child buffers to the caller.
                                521                 :          *
                                522                 :          * If this was a root split, we've already inserted the downlink
                                523                 :          * pointers, in the form of a new root page. Therefore we can release
                                524                 :          * all the new buffers, and keep just the root page locked.
                                525                 :          */
                                526           12298 :         if (is_rootsplit)
                                527                 :         {
                                528             579 :             for (ptr = dist->next; ptr; ptr = ptr->next)
                                529             387 :                 UnlockReleaseBuffer(ptr->buffer);
                                530                 :         }
                                531                 :     }
                                532                 :     else
                                533                 :     {
                                534                 :         /*
                                535                 :          * Enough space.  We always get here if ntup==0.
                                536                 :          */
 6178 teodor                    537          834765 :         START_CRIT_SECTION();
                                538                 : 
                                539                 :         /*
                                540                 :          * Delete old tuple if any, then insert new tuple(s) if any.  If
                                541                 :          * possible, use the fast path of PageIndexTupleOverwrite.
                                542                 :          */
 4490 heikki.linnakangas        543          834765 :         if (OffsetNumberIsValid(oldoffnum))
                                544                 :         {
 2403 tgl                       545          371373 :             if (ntup == 1)
                                546                 :             {
                                547                 :                 /* One-for-one replacement, so use PageIndexTupleOverwrite */
                                548          361640 :                 if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
                                549          361640 :                                              IndexTupleSize(*itup)))
 2403 tgl                       550 UBC           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"",
                                551                 :                          RelationGetRelationName(rel));
                                552                 :             }
                                553                 :             else
                                554                 :             {
                                555                 :                 /* Delete old, then append new tuple(s) to page */
 2403 tgl                       556 CBC        9733 :                 PageIndexTupleDelete(page, oldoffnum);
                                557            9733 :                 gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
                                558                 :             }
                                559                 :         }
                                560                 :         else
                                561                 :         {
                                562                 :             /* Just append new tuples at the end of the page */
                                563          463392 :             gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
                                564                 :         }
                                565                 : 
 4490 heikki.linnakangas        566          834765 :         MarkBufferDirty(buffer);
                                567                 : 
                                568          834765 :         if (BufferIsValid(leftchildbuf))
                                569            9382 :             MarkBufferDirty(leftchildbuf);
                                570                 : 
 1467                           571          834765 :         if (is_build)
                                572          588316 :             recptr = GistBuildLSN;
                                573                 :         else
                                574                 :         {
                                575          246449 :             if (RelationNeedsWAL(rel))
 6408 bruce                     576          246410 :             {
 1467 heikki.linnakangas        577          246410 :                 OffsetNumber ndeloffs = 0,
                                578                 :                             deloffs[1];
                                579                 : 
                                580          246410 :                 if (OffsetNumberIsValid(oldoffnum))
                                581                 :                 {
                                582           96869 :                     deloffs[0] = oldoffnum;
                                583           96869 :                     ndeloffs = 1;
                                584                 :                 }
                                585                 : 
                                586          246410 :                 recptr = gistXLogUpdate(buffer,
                                587                 :                                         deloffs, ndeloffs, itup, ntup,
                                588                 :                                         leftchildbuf);
                                589                 :             }
                                590                 :             else
                                591              39 :                 recptr = gistGetFakeLSN(rel);
                                592                 :         }
                                593          834765 :         PageSetLSN(page, recptr);
                                594                 : 
 3888                           595          834765 :         if (newblkno)
                                596           52185 :             *newblkno = blkno;
                                597                 :     }
                                598                 : 
                                599                 :     /*
                                600                 :      * If we inserted the downlink for a child page, set NSN and clear
                                601                 :      * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
                                602                 :      * follow the rightlink if and only if they looked at the parent page
                                603                 :      * before we inserted the downlink.
                                604                 :      *
                                605                 :      * Note that we do this *after* writing the WAL record. That means that
                                606                 :      * the possible full page image in the WAL record does not include these
                                607                 :      * changes, and they must be replayed even if the page is restored from
                                608                 :      * the full page image. There's a chicken-and-egg problem: if we updated
                                609                 :      * the child pages first, we wouldn't know the recptr of the WAL record
                                610                 :      * we're about to write.
                                611                 :      */
 4490                           612          847063 :     if (BufferIsValid(leftchildbuf))
                                613                 :     {
 2545 kgrittn                   614           11746 :         Page        leftpg = BufferGetPage(leftchildbuf);
                                615                 : 
 3734 heikki.linnakangas        616           11746 :         GistPageSetNSN(leftpg, recptr);
 4490                           617           11746 :         GistClearFollowRight(leftpg);
                                618                 : 
                                619           11746 :         PageSetLSN(leftpg, recptr);
                                620                 :     }
                                621                 : 
                                622          847063 :     END_CRIT_SECTION();
                                623                 : 
                                624          847063 :     return is_split;
                                625                 : }
                                626                 : 
                                627                 : /*
                                628                 :  * Workhouse routine for doing insertion into a GiST index. Note that
                                629                 :  * this routine assumes it is invoked in a short-lived memory context,
                                630                 :  * so it does not bother releasing palloc'd allocations.
                                631                 :  */
                                632                 : void
 1570 akorotkov                 633          455566 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
                                634                 :              GISTSTATE *giststate, Relation heapRel, bool is_build)
                                635                 : {
                                636                 :     ItemId      iid;
                                637                 :     IndexTuple  idxtuple;
                                638                 :     GISTInsertStack firststack;
                                639                 :     GISTInsertStack *stack;
                                640                 :     GISTInsertState state;
 4490 heikki.linnakangas        641          455566 :     bool        xlocked = false;
                                642                 : 
                                643          455566 :     memset(&state, 0, sizeof(GISTInsertState));
                                644          455566 :     state.freespace = freespace;
                                645          455566 :     state.r = r;
 1570 akorotkov                 646          455566 :     state.heapRel = heapRel;
 1467 heikki.linnakangas        647          455566 :     state.is_build = is_build;
                                648                 : 
                                649                 :     /* Start from the root */
 4490                           650          455566 :     firststack.blkno = GIST_ROOT_BLKNO;
 3941                           651          455566 :     firststack.lsn = 0;
 1426                           652          455566 :     firststack.retry_from_parent = false;
 4490                           653          455566 :     firststack.parent = NULL;
 4286                           654          455566 :     firststack.downlinkoffnum = InvalidOffsetNumber;
 4490                           655          455566 :     state.stack = stack = &firststack;
                                656                 : 
                                657                 :     /*
                                658                 :      * Walk down along the path of smallest penalty, updating the parent
                                659                 :      * pointers with the key we're inserting as we go. If we crash in the
                                660                 :      * middle, the tree is consistent, although the possible parent updates
                                661                 :      * were a waste.
                                662                 :      */
                                663                 :     for (;;)
                                664                 :     {
                                665                 :         /*
                                666                 :          * If we split an internal page while descending the tree, we have to
                                667                 :          * retry at the parent. (Normally, the LSN-NSN interlock below would
                                668                 :          * also catch this and cause us to retry. But LSNs are not updated
                                669                 :          * during index build.)
                                670                 :          */
 1426                           671         1007742 :         while (stack->retry_from_parent)
                                672                 :         {
 1426 heikki.linnakangas        673 UBC           0 :             if (xlocked)
                                674               0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
                                675               0 :             xlocked = false;
                                676               0 :             ReleaseBuffer(stack->buffer);
                                677               0 :             state.stack = stack = stack->parent;
                                678                 :         }
                                679                 : 
 4490 heikki.linnakangas        680 CBC     1007742 :         if (XLogRecPtrIsInvalid(stack->lsn))
                                681         1007724 :             stack->buffer = ReadBuffer(state.r, stack->blkno);
                                682                 : 
                                683                 :         /*
                                684                 :          * Be optimistic and grab shared lock first. Swap it for an exclusive
                                685                 :          * lock later if we need to update the page.
                                686                 :          */
                                687         1007742 :         if (!xlocked)
                                688                 :         {
                                689         1007742 :             LockBuffer(stack->buffer, GIST_SHARE);
                                690         1007742 :             gistcheckpage(state.r, stack->buffer);
                                691                 :         }
                                692                 : 
 2545 kgrittn                   693         1007742 :         stack->page = (Page) BufferGetPage(stack->buffer);
 1916 alvherre                  694         1007742 :         stack->lsn = xlocked ?
                                695         1007742 :             PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
 4490 heikki.linnakangas        696         1007742 :         Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
                                697                 : 
                                698                 :         /*
                                699                 :          * If this page was split but the downlink was never inserted to the
                                700                 :          * parent because the inserting backend crashed before doing that, fix
                                701                 :          * that now.
                                702                 :          */
                                703         1007742 :         if (GistFollowRight(stack->page))
                                704                 :         {
 4490 heikki.linnakangas        705 UBC           0 :             if (!xlocked)
                                706                 :             {
                                707               0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
                                708               0 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
                                709               0 :                 xlocked = true;
                                710                 :                 /* someone might've completed the split when we unlocked */
                                711               0 :                 if (!GistFollowRight(stack->page))
                                712               0 :                     continue;
                                713                 :             }
                                714               0 :             gistfixsplit(&state, giststate);
                                715                 : 
                                716               0 :             UnlockReleaseBuffer(stack->buffer);
                                717               0 :             xlocked = false;
                                718               0 :             state.stack = stack = stack->parent;
                                719               0 :             continue;
                                720                 :         }
                                721                 : 
 1355 heikki.linnakangas        722 CBC     1559900 :         if ((stack->blkno != GIST_ROOT_BLKNO &&
                                723          552158 :              stack->parent->lsn < GistPageGetNSN(stack->page)) ||
                                724         1007742 :             GistPageIsDeleted(stack->page))
                                725                 :         {
                                726                 :             /*
                                727                 :              * Concurrent split or page deletion detected. There's no
                                728                 :              * guarantee that the downlink for this page is consistent with
                                729                 :              * the tuple we're inserting anymore, so go back to parent and
                                730                 :              * rechoose the best child.
                                731                 :              */
 4490 heikki.linnakangas        732 UBC           0 :             UnlockReleaseBuffer(stack->buffer);
                                733               0 :             xlocked = false;
                                734               0 :             state.stack = stack = stack->parent;
 6495 teodor                    735               0 :             continue;
                                736                 :         }
                                737                 : 
 4490 heikki.linnakangas        738 CBC     1007742 :         if (!GistPageIsLeaf(stack->page))
                                739                 :         {
                                740                 :             /*
                                741                 :              * This is an internal page so continue to walk down the tree.
                                742                 :              * Find the child node that has the minimum insertion penalty.
                                743                 :              */
                                744                 :             BlockNumber childblkno;
                                745                 :             IndexTuple  newtup;
                                746                 :             GISTInsertStack *item;
                                747                 :             OffsetNumber downlinkoffnum;
                                748                 : 
 4286                           749          552176 :             downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
                                750          552176 :             iid = PageGetItemId(stack->page, downlinkoffnum);
 4490                           751          552176 :             idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
                                752          552176 :             childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
                                753                 : 
                                754                 :             /*
                                755                 :              * Check that it's not a leftover invalid tuple from pre-9.1
                                756                 :              */
                                757          552176 :             if (GistTupleIsInvalid(idxtuple))
 4490 heikki.linnakangas        758 UBC           0 :                 ereport(ERROR,
                                759                 :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
                                760                 :                                 RelationGetRelationName(r)),
                                761                 :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
                                762                 :                          errhint("Please REINDEX it.")));
                                763                 : 
                                764                 :             /*
                                765                 :              * Check that the key representing the target child node is
                                766                 :              * consistent with the key we're inserting. Update it if it's not.
                                767                 :              */
 4490 heikki.linnakangas        768 CBC      552176 :             newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
                                769          552176 :             if (newtup)
                                770                 :             {
                                771                 :                 /*
                                772                 :                  * Swap shared lock for an exclusive one. Beware, the page may
                                773                 :                  * change while we unlock/lock the page...
                                774                 :                  */
                                775          327185 :                 if (!xlocked)
                                776                 :                 {
                                777          327185 :                     LockBuffer(stack->buffer, GIST_UNLOCK);
                                778          327185 :                     LockBuffer(stack->buffer, GIST_EXCLUSIVE);
                                779          327185 :                     xlocked = true;
 2545 kgrittn                   780          327185 :                     stack->page = (Page) BufferGetPage(stack->buffer);
                                781                 : 
 3754 alvherre                  782          327185 :                     if (PageGetLSN(stack->page) != stack->lsn)
                                783                 :                     {
                                784                 :                         /* the page was changed while we unlocked it, retry */
 4490 heikki.linnakangas        785 UBC           0 :                         continue;
                                786                 :                     }
                                787                 :                 }
                                788                 : 
                                789                 :                 /*
                                790                 :                  * Update the tuple.
                                791                 :                  *
                                792                 :                  * We still hold the lock after gistinserttuple(), but it
                                793                 :                  * might have to split the page to make the updated tuple fit.
                                794                 :                  * In that case the updated tuple might migrate to the other
                                795                 :                  * half of the split, so we have to go back to the parent and
                                796                 :                  * descend back to the half that's a better fit for the new
                                797                 :                  * tuple.
                                798                 :                  */
 3985 heikki.linnakangas        799 CBC      327185 :                 if (gistinserttuple(&state, stack, giststate, newtup,
                                800                 :                                     downlinkoffnum))
                                801                 :                 {
                                802                 :                     /*
                                803                 :                      * If this was a root split, the root page continues to be
                                804                 :                      * the parent and the updated tuple went to one of the
                                805                 :                      * child pages, so we just need to retry from the root
                                806                 :                      * page.
                                807                 :                      */
 4473                           808              18 :                     if (stack->blkno != GIST_ROOT_BLKNO)
                                809                 :                     {
                                810              18 :                         UnlockReleaseBuffer(stack->buffer);
                                811              18 :                         xlocked = false;
                                812              18 :                         state.stack = stack = stack->parent;
                                813                 :                     }
 4490                           814              18 :                     continue;
                                815                 :                 }
                                816                 :             }
                                817          552158 :             LockBuffer(stack->buffer, GIST_UNLOCK);
                                818          552158 :             xlocked = false;
                                819                 : 
                                820                 :             /* descend to the chosen child */
                                821          552158 :             item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
                                822          552158 :             item->blkno = childblkno;
                                823          552158 :             item->parent = stack;
 4286                           824          552158 :             item->downlinkoffnum = downlinkoffnum;
 4490                           825          552158 :             state.stack = stack = item;
                                826                 :         }
                                827                 :         else
                                828                 :         {
                                829                 :             /*
                                830                 :              * Leaf page. Insert the new key. We've already updated all the
                                831                 :              * parents on the way down, but we might have to split the page if
                                832                 :              * it doesn't fit. gistinserttuple() will take care of that.
                                833                 :              */
                                834                 : 
                                835                 :             /*
                                836                 :              * Swap shared lock for an exclusive one. Be careful, the page may
                                837                 :              * change while we unlock/lock the page...
                                838                 :              */
                                839          455566 :             if (!xlocked)
                                840                 :             {
                                841          455566 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
                                842          455566 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
                                843          455566 :                 xlocked = true;
 2545 kgrittn                   844          455566 :                 stack->page = (Page) BufferGetPage(stack->buffer);
 4490 heikki.linnakangas        845          455566 :                 stack->lsn = PageGetLSN(stack->page);
                                846                 : 
                                847          455566 :                 if (stack->blkno == GIST_ROOT_BLKNO)
                                848                 :                 {
                                849                 :                     /*
                                850                 :                      * the only page that can become inner instead of leaf is
                                851                 :                      * the root page, so for root we should recheck it
                                852                 :                      */
                                853           25879 :                     if (!GistPageIsLeaf(stack->page))
                                854                 :                     {
                                855                 :                         /*
                                856                 :                          * very rare situation: during unlock/lock index with
                                857                 :                          * number of pages = 1 was increased
                                858                 :                          */
 4490 heikki.linnakangas        859 UBC           0 :                         LockBuffer(stack->buffer, GIST_UNLOCK);
                                860               0 :                         xlocked = false;
                                861               0 :                         continue;
                                862                 :                     }
                                863                 : 
                                864                 :                     /*
                                865                 :                      * we don't need to check root split, because checking
                                866                 :                      * leaf/inner is enough to recognize split for root
                                867                 :                      */
                                868                 :                 }
 1355 heikki.linnakangas        869 CBC      859374 :                 else if ((GistFollowRight(stack->page) ||
  809                           870          429687 :                           stack->parent->lsn < GistPageGetNSN(stack->page)) ||
 1355                           871          429687 :                          GistPageIsDeleted(stack->page))
                                872                 :                 {
                                873                 :                     /*
                                874                 :                      * The page was split or deleted while we momentarily
                                875                 :                      * unlocked the page. Go back to parent.
                                876                 :                      */
 4490 heikki.linnakangas        877 UBC           0 :                     UnlockReleaseBuffer(stack->buffer);
                                878               0 :                     xlocked = false;
                                879               0 :                     state.stack = stack = stack->parent;
                                880               0 :                     continue;
                                881                 :                 }
                                882                 :             }
                                883                 : 
                                884                 :             /* now state.stack->(page, buffer and blkno) points to leaf page */
                                885                 : 
 3985 heikki.linnakangas        886 CBC      455566 :             gistinserttuple(&state, stack, giststate, itup,
                                887                 :                             InvalidOffsetNumber);
 4490                           888          455560 :             LockBuffer(stack->buffer, GIST_UNLOCK);
                                889                 : 
                                890                 :             /* Release any pins we might still hold before exiting */
                                891         1463254 :             for (; stack; stack = stack->parent)
                                892         1007694 :                 ReleaseBuffer(stack->buffer);
 6508 teodor                    893          455560 :             break;
                                894                 :         }
                                895                 :     }
 6495                           896          455560 : }
                                897                 : 
                                898                 : /*
                                899                 :  * Traverse the tree to find path from root page to specified "child" block.
                                900                 :  *
                                901                 :  * returns a new insertion stack, starting from the parent of "child", up
                                902                 :  * to the root. *downlinkoffnum is set to the offset of the downlink in the
                                903                 :  * direct parent of child.
                                904                 :  *
                                905                 :  * To prevent deadlocks, this should lock only one page at a time.
                                906                 :  */
                                907                 : static GISTInsertStack *
 4286 heikki.linnakangas        908 UBC           0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
                                909                 : {
                                910                 :     Page        page;
                                911                 :     Buffer      buffer;
                                912                 :     OffsetNumber i,
                                913                 :                 maxoff;
                                914                 :     ItemId      iid;
                                915                 :     IndexTuple  idxtuple;
                                916                 :     List       *fifo;
                                917                 :     GISTInsertStack *top,
                                918                 :                *ptr;
                                919                 :     BlockNumber blkno;
                                920                 : 
                                921               0 :     top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
 6495 teodor                    922               0 :     top->blkno = GIST_ROOT_BLKNO;
 4286 heikki.linnakangas        923               0 :     top->downlinkoffnum = InvalidOffsetNumber;
                                924                 : 
                                925               0 :     fifo = list_make1(top);
                                926               0 :     while (fifo != NIL)
                                927                 :     {
                                928                 :         /* Get next page to visit */
                                929               0 :         top = linitial(fifo);
                                930               0 :         fifo = list_delete_first(fifo);
                                931                 : 
 6219 tgl                       932               0 :         buffer = ReadBuffer(r, top->blkno);
                                933               0 :         LockBuffer(buffer, GIST_SHARE);
 6363                           934               0 :         gistcheckpage(r, buffer);
 2545 kgrittn                   935               0 :         page = (Page) BufferGetPage(buffer);
                                936                 : 
 6408 bruce                     937               0 :         if (GistPageIsLeaf(page))
                                938                 :         {
                                939                 :             /*
                                940                 :              * Because we scan the index top-down, all the rest of the pages
                                941                 :              * in the queue must be leaf pages as well.
                                942                 :              */
 6218 tgl                       943               0 :             UnlockReleaseBuffer(buffer);
 4286 heikki.linnakangas        944               0 :             break;
                                945                 :         }
                                946                 : 
                                947                 :         /* currently, internal pages are never deleted */
 1355                           948               0 :         Assert(!GistPageIsDeleted(page));
                                949                 : 
 1916 alvherre                  950               0 :         top->lsn = BufferGetLSNAtomic(buffer);
                                951                 : 
                                952                 :         /*
                                953                 :          * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
                                954                 :          * downlink. This should not normally happen..
                                955                 :          */
 4490 heikki.linnakangas        956               0 :         if (GistFollowRight(page))
                                957               0 :             elog(ERROR, "concurrent GiST page split was incomplete");
                                958                 : 
 3734                           959               0 :         if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
 6408 bruce                     960               0 :             GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
                                961                 :         {
                                962                 :             /*
                                963                 :              * Page was split while we looked elsewhere. We didn't see the
                                964                 :              * downlink to the right page when we scanned the parent, so add
                                965                 :              * it to the queue now.
                                966                 :              *
                                967                 :              * Put the right page ahead of the queue, so that we visit it
                                968                 :              * next. That's important, because if this is the lowest internal
                                969                 :              * level, just above leaves, we might already have queued up some
                                970                 :              * leaf pages, and we assume that there can't be any non-leaf
                                971                 :              * pages behind leaf pages.
                                972                 :              */
                                973               0 :             ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
 6495 teodor                    974               0 :             ptr->blkno = GistPageGetOpaque(page)->rightlink;
 4286 heikki.linnakangas        975               0 :             ptr->downlinkoffnum = InvalidOffsetNumber;
                                976               0 :             ptr->parent = top->parent;
                                977                 : 
                                978               0 :             fifo = lcons(ptr, fifo);
                                979                 :         }
                                980                 : 
 6495 teodor                    981               0 :         maxoff = PageGetMaxOffsetNumber(page);
                                982                 : 
 6408 bruce                     983               0 :         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
                                984                 :         {
 6495 teodor                    985               0 :             iid = PageGetItemId(page, i);
                                986               0 :             idxtuple = (IndexTuple) PageGetItem(page, iid);
                                987               0 :             blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
 6408 bruce                     988               0 :             if (blkno == child)
                                989                 :             {
                                990                 :                 /* Found it! */
 6218 tgl                       991               0 :                 UnlockReleaseBuffer(buffer);
 4286 heikki.linnakangas        992               0 :                 *downlinkoffnum = i;
 6495 teodor                    993               0 :                 return top;
                                994                 :             }
                                995                 :             else
                                996                 :             {
                                997                 :                 /* Append this child to the list of pages to visit later */
 6408 bruce                     998               0 :                 ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
 6495 teodor                    999               0 :                 ptr->blkno = blkno;
 4286 heikki.linnakangas       1000               0 :                 ptr->downlinkoffnum = i;
 6495 teodor                   1001               0 :                 ptr->parent = top;
                               1002                 : 
 4286 heikki.linnakangas       1003               0 :                 fifo = lappend(fifo, ptr);
                               1004                 :             }
                               1005                 :         }
                               1006                 : 
 6218 tgl                      1007               0 :         UnlockReleaseBuffer(buffer);
                               1008                 :     }
                               1009                 : 
 4286 heikki.linnakangas       1010               0 :     elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
                               1011                 :          RelationGetRelationName(r), child);
                               1012                 :     return NULL;                /* keep compiler quiet */
                               1013                 : }
                               1014                 : 
                               1015                 : /*
                               1016                 :  * Updates the stack so that child->parent is the correct parent of the
                               1017                 :  * child. child->parent must be exclusively locked on entry, and will
                               1018                 :  * remain so at exit, but it might not be the same page anymore.
                               1019                 :  */
                               1020                 : static void
 6408 bruce                    1021 CBC       11746 : gistFindCorrectParent(Relation r, GISTInsertStack *child)
                               1022                 : {
                               1023           11746 :     GISTInsertStack *parent = child->parent;
                               1024                 : 
 6363 tgl                      1025           11746 :     gistcheckpage(r, parent->buffer);
 2545 kgrittn                  1026           11746 :     parent->page = (Page) BufferGetPage(parent->buffer);
                               1027                 : 
                               1028                 :     /* here we don't need to distinguish between split and page update */
 3754 alvherre                 1029           23492 :     if (child->downlinkoffnum == InvalidOffsetNumber ||
                               1030           11746 :         parent->lsn != PageGetLSN(parent->page))
                               1031                 :     {
                               1032                 :         /* parent is changed, look child in right links until found */
                               1033                 :         OffsetNumber i,
                               1034                 :                     maxoff;
                               1035                 :         ItemId      iid;
                               1036                 :         IndexTuple  idxtuple;
                               1037                 :         GISTInsertStack *ptr;
                               1038                 : 
                               1039                 :         while (true)
                               1040                 :         {
 6495 teodor                   1041            1587 :             maxoff = PageGetMaxOffsetNumber(parent->page);
 6408 bruce                    1042           74464 :             for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
                               1043                 :             {
 6495 teodor                   1044           74464 :                 iid = PageGetItemId(parent->page, i);
                               1045           74464 :                 idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
 6408 bruce                    1046           74464 :                 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
                               1047                 :                 {
                               1048                 :                     /* yes!!, found */
 4286 heikki.linnakangas       1049            1587 :                     child->downlinkoffnum = i;
 6495 teodor                   1050            1587 :                     return;
                               1051                 :                 }
                               1052                 :             }
                               1053                 : 
 6408 bruce                    1054 UBC           0 :             parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
 6218 tgl                      1055               0 :             UnlockReleaseBuffer(parent->buffer);
 6408 bruce                    1056               0 :             if (parent->blkno == InvalidBlockNumber)
                               1057                 :             {
                               1058                 :                 /*
                               1059                 :                  * End of chain and still didn't find parent. It's a very-very
                               1060                 :                  * rare situation when root splitted.
                               1061                 :                  */
 6495 teodor                   1062               0 :                 break;
                               1063                 :             }
 6408 bruce                    1064               0 :             parent->buffer = ReadBuffer(r, parent->blkno);
                               1065               0 :             LockBuffer(parent->buffer, GIST_EXCLUSIVE);
 6363 tgl                      1066               0 :             gistcheckpage(r, parent->buffer);
 2545 kgrittn                  1067               0 :             parent->page = (Page) BufferGetPage(parent->buffer);
                               1068                 :         }
                               1069                 : 
                               1070                 :         /*
                               1071                 :          * awful!!, we need search tree to find parent ... , but before we
                               1072                 :          * should release all old parent
                               1073                 :          */
                               1074                 : 
 6408 bruce                    1075               0 :         ptr = child->parent->parent;  /* child->parent already released
                               1076                 :                                          * above */
                               1077               0 :         while (ptr)
                               1078                 :         {
                               1079               0 :             ReleaseBuffer(ptr->buffer);
 6495 teodor                   1080               0 :             ptr = ptr->parent;
                               1081                 :         }
                               1082                 : 
                               1083                 :         /* ok, find new path */
 4286 heikki.linnakangas       1084               0 :         ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
                               1085                 : 
                               1086                 :         /* read all buffers as expected by caller */
                               1087                 :         /* note we don't lock them or gistcheckpage them here! */
 6408 bruce                    1088               0 :         while (ptr)
                               1089                 :         {
                               1090               0 :             ptr->buffer = ReadBuffer(r, ptr->blkno);
 2545 kgrittn                  1091               0 :             ptr->page = (Page) BufferGetPage(ptr->buffer);
 6495 teodor                   1092               0 :             ptr = ptr->parent;
                               1093                 :         }
                               1094                 : 
                               1095                 :         /* install new chain of parents to stack */
                               1096               0 :         child->parent = parent;
                               1097                 : 
                               1098                 :         /* make recursive call to normal processing */
 4490 heikki.linnakangas       1099               0 :         LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
 6408 bruce                    1100               0 :         gistFindCorrectParent(r, child);
                               1101                 :     }
                               1102                 : }
                               1103                 : 
                               1104                 : /*
                               1105                 :  * Form a downlink pointer for the page in 'buf'.
                               1106                 :  */
                               1107                 : static IndexTuple
 4490 heikki.linnakangas       1108               0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
                               1109                 :                  GISTInsertStack *stack)
                               1110                 : {
 2545 kgrittn                  1111               0 :     Page        page = BufferGetPage(buf);
                               1112                 :     OffsetNumber maxoff;
                               1113                 :     OffsetNumber offset;
 4382 bruce                    1114               0 :     IndexTuple  downlink = NULL;
                               1115                 : 
 4490 heikki.linnakangas       1116               0 :     maxoff = PageGetMaxOffsetNumber(page);
                               1117               0 :     for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
                               1118                 :     {
                               1119                 :         IndexTuple  ituple = (IndexTuple)
 4382 bruce                    1120               0 :         PageGetItem(page, PageGetItemId(page, offset));
                               1121                 : 
 4490 heikki.linnakangas       1122               0 :         if (downlink == NULL)
                               1123               0 :             downlink = CopyIndexTuple(ituple);
                               1124                 :         else
                               1125                 :         {
                               1126                 :             IndexTuple  newdownlink;
                               1127                 : 
                               1128               0 :             newdownlink = gistgetadjusted(rel, downlink, ituple,
                               1129                 :                                           giststate);
                               1130               0 :             if (newdownlink)
                               1131               0 :                 downlink = newdownlink;
                               1132                 :         }
                               1133                 :     }
                               1134                 : 
                               1135                 :     /*
                               1136                 :      * If the page is completely empty, we can't form a meaningful downlink
                               1137                 :      * for it. But we have to insert a downlink for the page. Any key will do,
                               1138                 :      * as long as its consistent with the downlink of parent page, so that we
                               1139                 :      * can legally insert it to the parent. A minimal one that matches as few
                               1140                 :      * scans as possible would be best, to keep scans from doing useless work,
                               1141                 :      * but we don't know how to construct that. So we just use the downlink of
                               1142                 :      * the original page that was split - that's as far from optimal as it can
                               1143                 :      * get but will do..
                               1144                 :      */
                               1145               0 :     if (!downlink)
                               1146                 :     {
                               1147                 :         ItemId      iid;
                               1148                 : 
                               1149               0 :         LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
                               1150               0 :         gistFindCorrectParent(rel, stack);
 4286                          1151               0 :         iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
 4490                          1152               0 :         downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
                               1153               0 :         downlink = CopyIndexTuple(downlink);
                               1154               0 :         LockBuffer(stack->parent->buffer, GIST_UNLOCK);
                               1155                 :     }
                               1156                 : 
                               1157               0 :     ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
                               1158               0 :     GistTupleSetValid(downlink);
                               1159                 : 
                               1160               0 :     return downlink;
                               1161                 : }
                               1162                 : 
                               1163                 : 
                               1164                 : /*
                               1165                 :  * Complete the incomplete split of state->stack->page.
                               1166                 :  */
                               1167                 : static void
                               1168               0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
                               1169                 : {
                               1170               0 :     GISTInsertStack *stack = state->stack;
                               1171                 :     Buffer      buf;
                               1172                 :     Page        page;
                               1173               0 :     List       *splitinfo = NIL;
                               1174                 : 
  856 peter                    1175               0 :     ereport(LOG,
                               1176                 :             (errmsg("fixing incomplete split in index \"%s\", block %u",
                               1177                 :                     RelationGetRelationName(state->r), stack->blkno)));
                               1178                 : 
 4490 heikki.linnakangas       1179               0 :     Assert(GistFollowRight(stack->page));
 4286                          1180               0 :     Assert(OffsetNumberIsValid(stack->downlinkoffnum));
                               1181                 : 
 4490                          1182               0 :     buf = stack->buffer;
                               1183                 : 
                               1184                 :     /*
                               1185                 :      * Read the chain of split pages, following the rightlinks. Construct a
                               1186                 :      * downlink tuple for each page.
                               1187                 :      */
                               1188                 :     for (;;)
                               1189               0 :     {
                               1190               0 :         GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
                               1191                 :         IndexTuple  downlink;
                               1192                 : 
 2545 kgrittn                  1193               0 :         page = BufferGetPage(buf);
                               1194                 : 
                               1195                 :         /* Form the new downlink tuples to insert to parent */
 4490 heikki.linnakangas       1196               0 :         downlink = gistformdownlink(state->r, buf, giststate, stack);
                               1197                 : 
                               1198               0 :         si->buf = buf;
                               1199               0 :         si->downlink = downlink;
                               1200                 : 
                               1201               0 :         splitinfo = lappend(splitinfo, si);
                               1202                 : 
                               1203               0 :         if (GistFollowRight(page))
                               1204                 :         {
                               1205                 :             /* lock next page */
                               1206               0 :             buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
                               1207               0 :             LockBuffer(buf, GIST_EXCLUSIVE);
                               1208                 :         }
                               1209                 :         else
                               1210               0 :             break;
                               1211                 :     }
                               1212                 : 
                               1213                 :     /* Insert the downlinks */
 3985                          1214               0 :     gistfinishsplit(state, stack, giststate, splitinfo, false);
 4490                          1215               0 : }
                               1216                 : 
                               1217                 : /*
                               1218                 :  * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
                               1219                 :  * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
                               1220                 :  * 'stack' represents the path from the root to the page being updated.
                               1221                 :  *
                               1222                 :  * The caller must hold an exclusive lock on stack->buffer.  The lock is still
                               1223                 :  * held on return, but the page might not contain the inserted tuple if the
                               1224                 :  * page was split. The function returns true if the page was split, false
                               1225                 :  * otherwise.
                               1226                 :  */
                               1227                 : static bool
 3985 heikki.linnakangas       1228 CBC      782751 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
                               1229                 :                 GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
                               1230                 : {
                               1231          782751 :     return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
                               1232                 :                             InvalidBuffer, InvalidBuffer, false, false);
                               1233                 : }
                               1234                 : 
                               1235                 : /* ----------------
                               1236                 :  * An extended workhorse version of gistinserttuple(). This version allows
                               1237                 :  * inserting multiple tuples, or replacing a single tuple with multiple tuples.
                               1238                 :  * This is used to recursively update the downlinks in the parent when a page
                               1239                 :  * is split.
                               1240                 :  *
                               1241                 :  * If leftchild and rightchild are valid, we're inserting/replacing the
                               1242                 :  * downlink for rightchild, and leftchild is its left sibling. We clear the
                               1243                 :  * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
                               1244                 :  * insertion of the downlink.
                               1245                 :  *
                               1246                 :  * To avoid holding locks for longer than necessary, when recursing up the
                               1247                 :  * tree to update the parents, the locking is a bit peculiar here. On entry,
                               1248                 :  * the caller must hold an exclusive lock on stack->buffer, as well as
                               1249                 :  * leftchild and rightchild if given. On return:
                               1250                 :  *
                               1251                 :  *  - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
                               1252                 :  *    always kept pinned, however.
                               1253                 :  *  - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
                               1254                 :  *    is kept pinned.
                               1255                 :  *  - Lock and pin on 'rightchild' are always released.
                               1256                 :  *
                               1257                 :  * Returns 'true' if the page had to be split. Note that if the page was
                               1258                 :  * split, the inserted/updated tuples might've been inserted to a right
                               1259                 :  * sibling of stack->buffer instead of stack->buffer itself.
                               1260                 :  */
                               1261                 : static bool
 4490                          1262          794497 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
                               1263                 :                  GISTSTATE *giststate,
                               1264                 :                  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
                               1265                 :                  Buffer leftchild, Buffer rightchild,
                               1266                 :                  bool unlockbuf, bool unlockleftchild)
                               1267                 : {
                               1268                 :     List       *splitinfo;
                               1269                 :     bool        is_split;
                               1270                 : 
                               1271                 :     /*
                               1272                 :      * Check for any rw conflicts (in serializable isolation level) just
                               1273                 :      * before we intend to modify the page
                               1274                 :      */
 1167 tmunro                   1275          794497 :     CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
                               1276                 : 
                               1277                 :     /* Insert the tuple(s) to the page, splitting the page if necessary */
 4231 heikki.linnakangas       1278          794491 :     is_split = gistplacetopage(state->r, state->freespace, giststate,
                               1279                 :                                stack->buffer,
                               1280                 :                                tuples, ntup,
                               1281                 :                                oldoffnum, NULL,
                               1282                 :                                leftchild,
                               1283                 :                                &splitinfo,
                               1284                 :                                true,
                               1285                 :                                state->heapRel,
 1467                          1286          794491 :                                state->is_build);
                               1287                 : 
                               1288                 :     /*
                               1289                 :      * Before recursing up in case the page was split, release locks on the
                               1290                 :      * child pages. We don't need to keep them locked when updating the
                               1291                 :      * parent.
                               1292                 :      */
 3985                          1293          794491 :     if (BufferIsValid(rightchild))
                               1294           11746 :         UnlockReleaseBuffer(rightchild);
                               1295          794491 :     if (BufferIsValid(leftchild) && unlockleftchild)
                               1296            2317 :         LockBuffer(leftchild, GIST_UNLOCK);
                               1297                 : 
                               1298                 :     /*
                               1299                 :      * If we had to split, insert/update the downlinks in the parent. If the
                               1300                 :      * caller requested us to release the lock on stack->buffer, tell
                               1301                 :      * gistfinishsplit() to do that as soon as it's safe to do so. If we
                               1302                 :      * didn't have to split, release it ourselves.
                               1303                 :      */
 4490                          1304          794491 :     if (splitinfo)
 3985                          1305           11722 :         gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
                               1306          782769 :     else if (unlockbuf)
                               1307            9405 :         LockBuffer(stack->buffer, GIST_UNLOCK);
                               1308                 : 
 4490                          1309          794491 :     return is_split;
                               1310                 : }
                               1311                 : 
                               1312                 : /*
                               1313                 :  * Finish an incomplete split by inserting/updating the downlinks in parent
                               1314                 :  * page. 'splitinfo' contains all the child pages involved in the split,
                               1315                 :  * from left-to-right.
                               1316                 :  *
                               1317                 :  * On entry, the caller must hold a lock on stack->buffer and all the child
                               1318                 :  * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
                               1319                 :  * released on return. The child pages are always unlocked and unpinned.
                               1320                 :  */
                               1321                 : static void
                               1322           11722 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
                               1323                 :                 GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
                               1324                 : {
                               1325                 :     GISTPageSplitInfo *right;
                               1326                 :     GISTPageSplitInfo *left;
                               1327                 :     IndexTuple  tuples[2];
                               1328                 : 
                               1329                 :     /* A split always contains at least two halves */
                               1330           11722 :     Assert(list_length(splitinfo) >= 2);
                               1331                 : 
                               1332                 :     /*
                               1333                 :      * We need to insert downlinks for each new page, and update the downlink
                               1334                 :      * for the original (leftmost) page in the split. Begin at the rightmost
                               1335                 :      * page, inserting one downlink at a time until there's only two pages
                               1336                 :      * left. Finally insert the downlink for the last new page and update the
                               1337                 :      * downlink for the original page as one operation.
                               1338                 :      */
                               1339           11722 :     LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
                               1340                 : 
                               1341                 :     /*
                               1342                 :      * Insert downlinks for the siblings from right to left, until there are
                               1343                 :      * only two siblings left.
                               1344                 :      */
 1362 tgl                      1345           11746 :     for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
                               1346                 :     {
                               1347              24 :         right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
                               1348              24 :         left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
                               1349                 : 
 1210 heikki.linnakangas       1350              24 :         gistFindCorrectParent(state->r, stack);
 4490                          1351              24 :         if (gistinserttuples(state, stack->parent, giststate,
                               1352                 :                              &right->downlink, 1,
                               1353                 :                              InvalidOffsetNumber,
                               1354                 :                              left->buf, right->buf, false, false))
                               1355                 :         {
                               1356                 :             /*
                               1357                 :              * If the parent page was split, the existing downlink might have
                               1358                 :              * moved.
                               1359                 :              */
 1213 heikki.linnakangas       1360 UBC           0 :             stack->downlinkoffnum = InvalidOffsetNumber;
                               1361                 :         }
                               1362                 :         /* gistinserttuples() released the lock on right->buf. */
                               1363                 :     }
                               1364                 : 
 1362 tgl                      1365 CBC       11722 :     right = (GISTPageSplitInfo *) lsecond(splitinfo);
                               1366           11722 :     left = (GISTPageSplitInfo *) linitial(splitinfo);
                               1367                 : 
                               1368                 :     /*
                               1369                 :      * Finally insert downlink for the remaining right page and update the
                               1370                 :      * downlink for the original page to not contain the tuples that were
                               1371                 :      * moved to the new pages.
                               1372                 :      */
 4490 heikki.linnakangas       1373           11722 :     tuples[0] = left->downlink;
                               1374           11722 :     tuples[1] = right->downlink;
 1210                          1375           11722 :     gistFindCorrectParent(state->r, stack);
                               1376           11722 :     if (gistinserttuples(state, stack->parent, giststate,
                               1377                 :                          tuples, 2,
                               1378           11722 :                          stack->downlinkoffnum,
                               1379                 :                          left->buf, right->buf,
                               1380                 :                          true,  /* Unlock parent */
                               1381                 :                          unlockbuf  /* Unlock stack->buffer if caller wants
                               1382                 :                                      * that */
                               1383                 :                          ))
                               1384                 :     {
                               1385                 :         /*
                               1386                 :          * If the parent page was split, the downlink might have moved.
                               1387                 :          */
                               1388            2364 :         stack->downlinkoffnum = InvalidOffsetNumber;
                               1389                 :     }
                               1390                 : 
 4490                          1391           11722 :     Assert(left->buf == stack->buffer);
                               1392                 : 
                               1393                 :     /*
                               1394                 :      * If we split the page because we had to adjust the downlink on an
                               1395                 :      * internal page, while descending the tree for inserting a new tuple,
                               1396                 :      * then this might no longer be the correct page for the new tuple. The
                               1397                 :      * downlink to this page might not cover the new tuple anymore, it might
                               1398                 :      * need to go to the newly-created right sibling instead. Tell the caller
                               1399                 :      * to walk back up the stack, to re-check at the parent which page to
                               1400                 :      * insert to.
                               1401                 :      *
                               1402                 :      * Normally, the LSN-NSN interlock during the tree descend would also
                               1403                 :      * detect that a concurrent split happened (by ourselves), and cause us to
                               1404                 :      * retry at the parent. But that mechanism doesn't work during index
                               1405                 :      * build, because we don't do WAL-logging, and don't update LSNs, during
                               1406                 :      * index build.
                               1407                 :      */
 1426                          1408           11722 :     stack->retry_from_parent = true;
 6502 teodor                   1409           11722 : }
                               1410                 : 
                               1411                 : /*
                               1412                 :  * gistSplit -- split a page in the tree and fill struct
                               1413                 :  * used for XLOG and real writes buffers. Function is recursive, ie
                               1414                 :  * it will split page until keys will fit in every page.
                               1415                 :  */
                               1416                 : SplitedPageLayout *
 9722 scrappy                  1417           12884 : gistSplit(Relation r,
                               1418                 :           Page page,
                               1419                 :           IndexTuple *itup,     /* contains compressed entry */
                               1420                 :           int len,
                               1421                 :           GISTSTATE *giststate)
                               1422                 : {
                               1423                 :     IndexTuple *lvectup,
                               1424                 :                *rvectup;
                               1425                 :     GistSplitVector v;
                               1426                 :     int         i;
 6031 bruce                    1427           12884 :     SplitedPageLayout *res = NULL;
                               1428                 : 
                               1429                 :     /* this should never recurse very deeply, but better safe than sorry */
 3110 heikki.linnakangas       1430           12884 :     check_stack_depth();
                               1431                 : 
                               1432                 :     /* there's no point in splitting an empty page */
                               1433           12884 :     Assert(len > 0);
                               1434                 : 
                               1435                 :     /*
                               1436                 :      * If a single tuple doesn't fit on a page, no amount of splitting will
                               1437                 :      * help.
                               1438                 :      */
                               1439           12884 :     if (len == 1)
 3110 heikki.linnakangas       1440 UBC           0 :         ereport(ERROR,
                               1441                 :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                               1442                 :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
                               1443                 :                         IndexTupleSize(itup[0]), GiSTPageSize,
                               1444                 :                         RelationGetRelationName(r))));
                               1445                 : 
 1491 akorotkov                1446 CBC       12884 :     memset(v.spl_lisnull, true,
                               1447           12884 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
                               1448           12884 :     memset(v.spl_risnull, true,
                               1449           12884 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
 3710 tgl                      1450           12884 :     gistSplitByKey(r, page, itup, len, giststate, &v, 0);
                               1451                 : 
                               1452                 :     /* form left and right vector */
 6178 teodor                   1453           12884 :     lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
                               1454           12884 :     rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
                               1455                 : 
 6129                          1456          540908 :     for (i = 0; i < v.splitVector.spl_nleft; i++)
                               1457          528024 :         lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
                               1458                 : 
                               1459          605738 :     for (i = 0; i < v.splitVector.spl_nright; i++)
                               1460          592854 :         rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
                               1461                 : 
                               1462                 :     /* finalize splitting (may need another split) */
                               1463           12884 :     if (!gistfitpage(rvectup, v.splitVector.spl_nright))
                               1464                 :     {
                               1465             292 :         res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
                               1466                 :     }
                               1467                 :     else
                               1468                 :     {
 6178                          1469           12592 :         ROTATEDIST(res);
 6129                          1470           12592 :         res->block.num = v.splitVector.spl_nright;
 6031 bruce                    1471           12592 :         res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
 4490 heikki.linnakangas       1472           12592 :         res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
                               1473                 :     }
                               1474                 : 
 6129 teodor                   1475           12884 :     if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
                               1476                 :     {
                               1477                 :         SplitedPageLayout *resptr,
                               1478                 :                    *subres;
                               1479                 : 
                               1480             157 :         resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
                               1481                 : 
                               1482                 :         /* install on list's tail */
 6031 bruce                    1483             395 :         while (resptr->next)
 6178 teodor                   1484             238 :             resptr = resptr->next;
                               1485                 : 
                               1486             157 :         resptr->next = res;
                               1487             157 :         res = subres;
                               1488                 :     }
                               1489                 :     else
                               1490                 :     {
                               1491           12727 :         ROTATEDIST(res);
 6129                          1492           12727 :         res->block.num = v.splitVector.spl_nleft;
 6031 bruce                    1493           12727 :         res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
 4490 heikki.linnakangas       1494           12727 :         res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
                               1495                 :     }
                               1496                 : 
 6178 teodor                   1497           12884 :     return res;
                               1498                 : }
                               1499                 : 
                               1500                 : /*
                               1501                 :  * Create a GISTSTATE and fill it with information about the index
                               1502                 :  */
                               1503                 : GISTSTATE *
 4209 tgl                      1504            1788 : initGISTstate(Relation index)
                               1505                 : {
                               1506                 :     GISTSTATE  *giststate;
                               1507                 :     MemoryContext scanCxt;
                               1508                 :     MemoryContext oldCxt;
                               1509                 :     int         i;
                               1510                 : 
                               1511                 :     /* safety check to protect fixed-size arrays in GISTSTATE */
 7901                          1512            1788 :     if (index->rd_att->natts > INDEX_MAX_KEYS)
 7202 tgl                      1513 UBC           0 :         elog(ERROR, "numberOfAttributes %d > %d",
                               1514                 :              index->rd_att->natts, INDEX_MAX_KEYS);
                               1515                 : 
                               1516                 :     /* Create the memory context that will hold the GISTSTATE */
 4209 tgl                      1517 CBC        1788 :     scanCxt = AllocSetContextCreate(CurrentMemoryContext,
                               1518                 :                                     "GiST scan context",
                               1519                 :                                     ALLOCSET_DEFAULT_SIZES);
                               1520            1788 :     oldCxt = MemoryContextSwitchTo(scanCxt);
                               1521                 : 
                               1522                 :     /* Create and fill in the GISTSTATE */
                               1523            1788 :     giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
                               1524                 : 
                               1525            1788 :     giststate->scanCxt = scanCxt;
 2118                          1526            1788 :     giststate->tempCxt = scanCxt;    /* caller must change this if needed */
 1491 akorotkov                1527            1788 :     giststate->leafTupdesc = index->rd_att;
                               1528                 : 
                               1529                 :     /*
                               1530                 :      * The truncated tupdesc for non-leaf index tuples, which doesn't contain
                               1531                 :      * the INCLUDE attributes.
                               1532                 :      *
                               1533                 :      * It is used to form tuples during tuple adjustment and page split.
                               1534                 :      * B-tree creates shortened tuple descriptor for every truncated tuple,
                               1535                 :      * because it is doing this less often: it does not have to form truncated
                               1536                 :      * tuples during page split.  Also, B-tree is not adjusting tuples on
                               1537                 :      * internal pages the way GiST does.
                               1538                 :      */
                               1539            1788 :     giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
                               1540            1788 :     giststate->nonLeafTupdesc->natts =
                               1541            1788 :         IndexRelationGetNumberOfKeyAttributes(index);
                               1542                 : 
                               1543            3736 :     for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
                               1544                 :     {
 7855 tgl                      1545            1948 :         fmgr_info_copy(&(giststate->consistentFn[i]),
 6536 neilc                    1546            1948 :                        index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
                               1547                 :                        scanCxt);
 7855 tgl                      1548            1948 :         fmgr_info_copy(&(giststate->unionFn[i]),
 7836 bruce                    1549            1948 :                        index_getprocinfo(index, i + 1, GIST_UNION_PROC),
                               1550                 :                        scanCxt);
                               1551                 : 
                               1552                 :         /* opclasses are not required to provide a Compress method */
 2028 tgl                      1553            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
                               1554            1448 :             fmgr_info_copy(&(giststate->compressFn[i]),
                               1555            1448 :                            index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
                               1556                 :                            scanCxt);
                               1557                 :         else
                               1558             500 :             giststate->compressFn[i].fn_oid = InvalidOid;
                               1559                 : 
                               1560                 :         /* opclasses are not required to provide a Decompress method */
                               1561            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
                               1562             745 :             fmgr_info_copy(&(giststate->decompressFn[i]),
                               1563             745 :                            index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
                               1564                 :                            scanCxt);
                               1565                 :         else
                               1566            1203 :             giststate->decompressFn[i].fn_oid = InvalidOid;
                               1567                 : 
 7855                          1568            1948 :         fmgr_info_copy(&(giststate->penaltyFn[i]),
 7836 bruce                    1569            1948 :                        index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
                               1570                 :                        scanCxt);
 7855 tgl                      1571            1948 :         fmgr_info_copy(&(giststate->picksplitFn[i]),
 6536 neilc                    1572            1948 :                        index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
                               1573                 :                        scanCxt);
 7855 tgl                      1574            1948 :         fmgr_info_copy(&(giststate->equalFn[i]),
 7836 bruce                    1575            1948 :                        index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
                               1576                 :                        scanCxt);
                               1577                 : 
                               1578                 :         /* opclasses are not required to provide a Distance method */
 4510 tgl                      1579            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
                               1580             898 :             fmgr_info_copy(&(giststate->distanceFn[i]),
 2118                          1581             898 :                            index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
                               1582                 :                            scanCxt);
                               1583                 :         else
 4510                          1584            1050 :             giststate->distanceFn[i].fn_oid = InvalidOid;
                               1585                 : 
                               1586                 :         /* opclasses are not required to provide a Fetch method */
 2936 heikki.linnakangas       1587            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
                               1588             631 :             fmgr_info_copy(&(giststate->fetchFn[i]),
 2878 bruce                    1589             631 :                            index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
                               1590                 :                            scanCxt);
                               1591                 :         else
 2936 heikki.linnakangas       1592            1317 :             giststate->fetchFn[i].fn_oid = InvalidOid;
                               1593                 : 
                               1594                 :         /*
                               1595                 :          * If the index column has a specified collation, we should honor that
                               1596                 :          * while doing comparisons.  However, we may have a collatable storage
                               1597                 :          * type for a noncollatable indexed data type.  If there's no index
                               1598                 :          * collation then specify default collation in case the support
                               1599                 :          * functions need collation.  This is harmless if the support
                               1600                 :          * functions don't care about collation, so we just do it
                               1601                 :          * unconditionally.  (We could alternatively call get_typcollation,
                               1602                 :          * but that seems like expensive overkill --- there aren't going to be
                               1603                 :          * any cases where a GiST storage type has a nondefault collation.)
                               1604                 :          */
 4370 tgl                      1605            1948 :         if (OidIsValid(index->rd_indcollation[i]))
                               1606              96 :             giststate->supportCollation[i] = index->rd_indcollation[i];
                               1607                 :         else
                               1608            1852 :             giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
                               1609                 :     }
                               1610                 : 
                               1611                 :     /* No opclass information for INCLUDE attributes */
 1491 akorotkov                1612            2037 :     for (; i < index->rd_att->natts; i++)
                               1613                 :     {
                               1614             249 :         giststate->consistentFn[i].fn_oid = InvalidOid;
                               1615             249 :         giststate->unionFn[i].fn_oid = InvalidOid;
                               1616             249 :         giststate->compressFn[i].fn_oid = InvalidOid;
                               1617             249 :         giststate->decompressFn[i].fn_oid = InvalidOid;
                               1618             249 :         giststate->penaltyFn[i].fn_oid = InvalidOid;
                               1619             249 :         giststate->picksplitFn[i].fn_oid = InvalidOid;
                               1620             249 :         giststate->equalFn[i].fn_oid = InvalidOid;
                               1621             249 :         giststate->distanceFn[i].fn_oid = InvalidOid;
                               1622             249 :         giststate->fetchFn[i].fn_oid = InvalidOid;
                               1623             249 :         giststate->supportCollation[i] = InvalidOid;
                               1624                 :     }
                               1625                 : 
 4209 tgl                      1626            1788 :     MemoryContextSwitchTo(oldCxt);
                               1627                 : 
                               1628            1788 :     return giststate;
                               1629                 : }
                               1630                 : 
                               1631                 : void
 7836 bruce                    1632            1523 : freeGISTstate(GISTSTATE *giststate)
                               1633                 : {
                               1634                 :     /* It's sufficient to delete the scanCxt */
 4209 tgl                      1635            1523 :     MemoryContextDelete(giststate->scanCxt);
 9722 scrappy                  1636            1523 : }
                               1637                 : 
                               1638                 : /*
                               1639                 :  * gistprunepage() -- try to remove LP_DEAD items from the given page.
                               1640                 :  * Function assumes that buffer is exclusively locked.
                               1641                 :  */
                               1642                 : static void
 1496 heikki.linnakangas       1643 UBC           0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
                               1644                 : {
                               1645                 :     OffsetNumber deletable[MaxIndexTuplesPerPage];
 2495 rhaas                    1646               0 :     int         ndeletable = 0;
                               1647                 :     OffsetNumber offnum,
                               1648                 :                 maxoff;
                               1649                 : 
 2769 teodor                   1650               0 :     Assert(GistPageIsLeaf(page));
                               1651                 : 
                               1652                 :     /*
                               1653                 :      * Scan over all items to see which ones need to be deleted according to
                               1654                 :      * LP_DEAD flags.
                               1655                 :      */
                               1656               0 :     maxoff = PageGetMaxOffsetNumber(page);
                               1657               0 :     for (offnum = FirstOffsetNumber;
                               1658               0 :          offnum <= maxoff;
                               1659               0 :          offnum = OffsetNumberNext(offnum))
                               1660                 :     {
                               1661               0 :         ItemId      itemId = PageGetItemId(page, offnum);
                               1662                 : 
                               1663               0 :         if (ItemIdIsDead(itemId))
                               1664               0 :             deletable[ndeletable++] = offnum;
                               1665                 :     }
                               1666                 : 
                               1667               0 :     if (ndeletable > 0)
                               1668                 :     {
  143 pg                       1669 UNC           0 :         TransactionId snapshotConflictHorizon = InvalidTransactionId;
                               1670                 : 
  803 pg                       1671 UBC           0 :         if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
                               1672                 :             snapshotConflictHorizon =
                               1673               0 :                 index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
                               1674                 :                                                      deletable, ndeletable);
                               1675                 : 
 2769 teodor                   1676               0 :         START_CRIT_SECTION();
                               1677                 : 
                               1678               0 :         PageIndexMultiDelete(page, deletable, ndeletable);
                               1679                 : 
                               1680                 :         /*
                               1681                 :          * Mark the page as not containing any LP_DEAD items.  This is not
                               1682                 :          * certainly true (there might be some that have recently been marked,
                               1683                 :          * but weren't included in our target-item list), but it will almost
                               1684                 :          * always be true and it doesn't seem worth an additional page scan to
                               1685                 :          * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
                               1686                 :          */
                               1687               0 :         GistClearPageHasGarbage(page);
                               1688                 : 
                               1689               0 :         MarkBufferDirty(buffer);
                               1690                 : 
                               1691                 :         /* XLOG stuff */
                               1692               0 :         if (RelationNeedsWAL(rel))
                               1693               0 :         {
                               1694                 :             XLogRecPtr  recptr;
                               1695                 : 
 1570 akorotkov                1696               0 :             recptr = gistXLogDelete(buffer,
                               1697                 :                                     deletable, ndeletable,
                               1698                 :                                     snapshotConflictHorizon,
                               1699                 :                                     heapRel);
                               1700                 : 
 2769 teodor                   1701 UIC           0 :             PageSetLSN(page, recptr);
 2769 teodor                   1702 EUB             :         }
                               1703                 :         else
 2769 teodor                   1704 UIC           0 :             PageSetLSN(page, gistGetFakeLSN(rel));
 2769 teodor                   1705 EUB             : 
 2769 teodor                   1706 UIC           0 :         END_CRIT_SECTION();
 2769 teodor                   1707 EUB             :     }
                               1708                 : 
                               1709                 :     /*
                               1710                 :      * Note: if we didn't find any LP_DEAD items, then the page's
                               1711                 :      * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
                               1712                 :      * separate write to clear it, however.  We will clear it when we split
                               1713                 :      * the page.
                               1714                 :      */
 2769 teodor                   1715 UIC           0 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a