LCOV - differential code coverage report
Current view: top level - src/backend/access/gist - gist.c (source / functions) Coverage Total Hit UNC UIC UBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 71.3 % 578 412 1 4 161 20 4 388 4 21 1 3
Current Date: 2023-04-08 15:15:32 Functions: 76.5 % 17 13 1 3 4 1 8 4
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * gist.c
       4                 :  *    interface routines for the postgres GiST index access method.
       5                 :  *
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/gist/gist.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/gist_private.h"
      18                 : #include "access/gistscan.h"
      19                 : #include "access/xloginsert.h"
      20                 : #include "catalog/pg_collation.h"
      21                 : #include "commands/vacuum.h"
      22                 : #include "miscadmin.h"
      23                 : #include "nodes/execnodes.h"
      24                 : #include "storage/lmgr.h"
      25                 : #include "storage/predicate.h"
      26                 : #include "utils/builtins.h"
      27                 : #include "utils/index_selfuncs.h"
      28                 : #include "utils/memutils.h"
      29                 : #include "utils/rel.h"
      30                 : 
      31                 : /* non-export function prototypes */
      32                 : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
      33                 : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
      34                 :                             GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
      35                 : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
      36                 :                              GISTSTATE *giststate,
      37                 :                              IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
      38                 :                              Buffer leftchild, Buffer rightchild,
      39                 :                              bool unlockbuf, bool unlockleftchild);
      40                 : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
      41                 :                             GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
      42                 : static void gistprunepage(Relation rel, Page page, Buffer buffer,
      43                 :                           Relation heapRel);
      44                 : 
      45                 : 
      46                 : #define ROTATEDIST(d) do { \
      47                 :     SplitedPageLayout *tmp=(SplitedPageLayout*)palloc0(sizeof(SplitedPageLayout)); \
      48                 :     tmp->block.blkno = InvalidBlockNumber;   \
      49                 :     tmp->buffer = InvalidBuffer; \
      50                 :     tmp->next = (d); \
      51                 :     (d)=tmp; \
      52                 : } while(0)
      53                 : 
      54                 : 
      55                 : /*
      56                 :  * GiST handler function: return IndexAmRoutine with access method parameters
      57                 :  * and callbacks.
      58                 :  */
      59                 : Datum
      60 CBC        2179 : gisthandler(PG_FUNCTION_ARGS)
      61                 : {
      62            2179 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
      63                 : 
      64            2179 :     amroutine->amstrategies = 0;
      65            2179 :     amroutine->amsupport = GISTNProcs;
      66            2179 :     amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
      67            2179 :     amroutine->amcanorder = false;
      68            2179 :     amroutine->amcanorderbyop = true;
      69            2179 :     amroutine->amcanbackward = false;
      70            2179 :     amroutine->amcanunique = false;
      71            2179 :     amroutine->amcanmulticol = true;
      72            2179 :     amroutine->amoptionalkey = true;
      73            2179 :     amroutine->amsearcharray = false;
      74            2179 :     amroutine->amsearchnulls = true;
      75            2179 :     amroutine->amstorage = true;
      76            2179 :     amroutine->amclusterable = true;
      77            2179 :     amroutine->ampredlocks = true;
      78            2179 :     amroutine->amcanparallel = false;
      79            2179 :     amroutine->amcaninclude = true;
      80            2179 :     amroutine->amusemaintenanceworkmem = false;
      81 GNC        2179 :     amroutine->amsummarizing = false;
      82 CBC        2179 :     amroutine->amparallelvacuumoptions =
      83 ECB             :         VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
      84 GIC        2179 :     amroutine->amkeytype = InvalidOid;
      85 ECB             : 
      86 GIC        2179 :     amroutine->ambuild = gistbuild;
      87 CBC        2179 :     amroutine->ambuildempty = gistbuildempty;
      88            2179 :     amroutine->aminsert = gistinsert;
      89            2179 :     amroutine->ambulkdelete = gistbulkdelete;
      90            2179 :     amroutine->amvacuumcleanup = gistvacuumcleanup;
      91            2179 :     amroutine->amcanreturn = gistcanreturn;
      92            2179 :     amroutine->amcostestimate = gistcostestimate;
      93            2179 :     amroutine->amoptions = gistoptions;
      94            2179 :     amroutine->amproperty = gistproperty;
      95            2179 :     amroutine->ambuildphasename = NULL;
      96            2179 :     amroutine->amvalidate = gistvalidate;
      97            2179 :     amroutine->amadjustmembers = gistadjustmembers;
      98            2179 :     amroutine->ambeginscan = gistbeginscan;
      99            2179 :     amroutine->amrescan = gistrescan;
     100            2179 :     amroutine->amgettuple = gistgettuple;
     101            2179 :     amroutine->amgetbitmap = gistgetbitmap;
     102            2179 :     amroutine->amendscan = gistendscan;
     103            2179 :     amroutine->ammarkpos = NULL;
     104            2179 :     amroutine->amrestrpos = NULL;
     105            2179 :     amroutine->amestimateparallelscan = NULL;
     106            2179 :     amroutine->aminitparallelscan = NULL;
     107            2179 :     amroutine->amparallelrescan = NULL;
     108 ECB             : 
     109 GIC        2179 :     PG_RETURN_POINTER(amroutine);
     110 ECB             : }
     111                 : 
     112                 : /*
     113                 :  * Create and return a temporary memory context for use by GiST. We
     114                 :  * _always_ invoke user-provided methods in a temporary memory
     115                 :  * context, so that memory leaks in those functions cannot cause
     116                 :  * problems. Also, we use some additional temporary contexts in the
     117                 :  * GiST code itself, to avoid the need to do some awkward manual
     118                 :  * memory management.
     119                 :  */
     120                 : MemoryContext
     121 GIC        1929 : createTempGistContext(void)
     122 ECB             : {
     123 GIC        1929 :     return AllocSetContextCreate(CurrentMemoryContext,
     124 ECB             :                                  "GiST temporary context",
     125                 :                                  ALLOCSET_DEFAULT_SIZES);
     126                 : }
     127                 : 
     128                 : /*
     129                 :  *  gistbuildempty() -- build an empty gist index in the initialization fork
     130                 :  */
     131                 : void
     132 GIC           3 : gistbuildempty(Relation index)
     133 ECB             : {
     134                 :     Buffer      buffer;
     135                 : 
     136                 :     /* Initialize the root page */
     137 GNC           3 :     buffer = ExtendBufferedRel(EB_REL(index), INIT_FORKNUM, NULL,
     138                 :                                EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
     139                 : 
     140                 :     /* Initialize and xlog buffer */
     141 GIC           3 :     START_CRIT_SECTION();
     142 CBC           3 :     GISTInitBuffer(buffer, F_LEAF);
     143               3 :     MarkBufferDirty(buffer);
     144               3 :     log_newpage_buffer(buffer, true);
     145               3 :     END_CRIT_SECTION();
     146 ECB             : 
     147                 :     /* Unlock and release the buffer */
     148 GIC           3 :     UnlockReleaseBuffer(buffer);
     149 CBC           3 : }
     150 ECB             : 
     151                 : /*
     152                 :  *  gistinsert -- wrapper for GiST tuple insertion.
     153                 :  *
     154                 :  *    This is the public interface routine for tuple insertion in GiSTs.
     155                 :  *    It doesn't do any work; just locks the relation and passes the buck.
     156                 :  */
     157                 : bool
     158 GIC      151307 : gistinsert(Relation r, Datum *values, bool *isnull,
     159 ECB             :            ItemPointer ht_ctid, Relation heapRel,
     160                 :            IndexUniqueCheck checkUnique,
     161                 :            bool indexUnchanged,
     162                 :            IndexInfo *indexInfo)
     163                 : {
     164 GIC      151307 :     GISTSTATE  *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
     165 ECB             :     IndexTuple  itup;
     166                 :     MemoryContext oldCxt;
     167                 : 
     168                 :     /* Initialize GISTSTATE cache if first call in this statement */
     169 GIC      151307 :     if (giststate == NULL)
     170 ECB             :     {
     171 GIC         237 :         oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
     172 CBC         237 :         giststate = initGISTstate(r);
     173             237 :         giststate->tempCxt = createTempGistContext();
     174             237 :         indexInfo->ii_AmCache = (void *) giststate;
     175             237 :         MemoryContextSwitchTo(oldCxt);
     176 ECB             :     }
     177                 : 
     178 GIC      151307 :     oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
     179 ECB             : 
     180 GIC      151307 :     itup = gistFormTuple(giststate, r,
     181 ECB             :                          values, isnull, true /* size is currently bogus */ );
     182 GIC      151307 :     itup->t_tid = *ht_ctid;
     183 ECB             : 
     184 GIC      151307 :     gistdoinsert(r, itup, 0, giststate, heapRel, false);
     185 ECB             : 
     186                 :     /* cleanup */
     187 GIC      151301 :     MemoryContextSwitchTo(oldCxt);
     188 CBC      151301 :     MemoryContextReset(giststate->tempCxt);
     189 ECB             : 
     190 GIC      151301 :     return false;
     191 ECB             : }
     192                 : 
     193                 : 
     194                 : /*
     195                 :  * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
     196                 :  * at that offset is atomically removed along with inserting the new tuples.
     197                 :  * This is used to replace a tuple with a new one.
     198                 :  *
     199                 :  * If 'leftchildbuf' is valid, we're inserting the downlink for the page
     200                 :  * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
     201                 :  * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
     202                 :  *
     203                 :  * If 'markfollowright' is true and the page is split, the left child is
     204                 :  * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
     205                 :  * index build, however, there is no concurrent access and the page splitting
     206                 :  * is done in a slightly simpler fashion, and false is passed.
     207                 :  *
     208                 :  * If there is not enough room on the page, it is split. All the split
     209                 :  * pages are kept pinned and locked and returned in *splitinfo, the caller
     210                 :  * is responsible for inserting the downlinks for them. However, if
     211                 :  * 'buffer' is the root page and it needs to be split, gistplacetopage()
     212                 :  * performs the split as one atomic operation, and *splitinfo is set to NIL.
     213                 :  * In that case, we continue to hold the root page locked, and the child
     214                 :  * pages are released; note that new tuple(s) are *not* on the root page
     215                 :  * but in one of the new child pages.
     216                 :  *
     217                 :  * If 'newblkno' is not NULL, returns the block number of page the first
     218                 :  * new/updated tuple was inserted to. Usually it's the given page, but could
     219                 :  * be its right sibling if the page was split.
     220                 :  *
     221                 :  * Returns 'true' if the page was split, 'false' otherwise.
     222                 :  */
     223                 : bool
     224 GIC      847063 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
     225 ECB             :                 Buffer buffer,
     226                 :                 IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
     227                 :                 BlockNumber *newblkno,
     228                 :                 Buffer leftchildbuf,
     229                 :                 List **splitinfo,
     230                 :                 bool markfollowright,
     231                 :                 Relation heapRel,
     232                 :                 bool is_build)
     233                 : {
     234 GIC      847063 :     BlockNumber blkno = BufferGetBlockNumber(buffer);
     235 CBC      847063 :     Page        page = BufferGetPage(buffer);
     236          847063 :     bool        is_leaf = (GistPageIsLeaf(page)) ? true : false;
     237 ECB             :     XLogRecPtr  recptr;
     238                 :     bool        is_split;
     239                 : 
     240                 :     /*
     241                 :      * Refuse to modify a page that's incompletely split. This should not
     242                 :      * happen because we finish any incomplete splits while we walk down the
     243                 :      * tree. However, it's remotely possible that another concurrent inserter
     244                 :      * splits a parent page, and errors out before completing the split. We
     245                 :      * will just throw an error in that case, and leave any split we had in
     246                 :      * progress unfinished too. The next insert that comes along will clean up
     247                 :      * the mess.
     248                 :      */
     249 CBC      847063 :     if (GistFollowRight(page))
     250 UBC           0 :         elog(ERROR, "concurrent GiST page split was incomplete");
     251                 : 
     252                 :     /* should never try to insert to a deleted page */
     253 CBC      847063 :     Assert(!GistPageIsDeleted(page));
     254                 : 
     255          847063 :     *splitinfo = NIL;
     256                 : 
     257                 :     /*
     258                 :      * if isupdate, remove old key: This node's key has been modified, either
     259                 :      * because a child split occurred or because we needed to adjust our key
     260                 :      * for an insert in a child node. Therefore, remove the old version of
     261                 :      * this node's key.
     262                 :      *
     263                 :      * for WAL replay, in the non-split case we handle this by setting up a
     264                 :      * one-element todelete array; in the split case, it's handled implicitly
     265                 :      * because the tuple vector passed to gistSplit won't include this tuple.
     266                 :      */
     267          847063 :     is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
     268                 : 
     269                 :     /*
     270                 :      * If leaf page is full, try at first to delete dead tuples. And then
     271                 :      * check again.
     272                 :      */
     273          847063 :     if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
     274                 :     {
     275 UBC           0 :         gistprunepage(rel, page, buffer, heapRel);
     276               0 :         is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
     277                 :     }
     278                 : 
     279 CBC      847063 :     if (is_split)
     280                 :     {
     281                 :         /* no space for insertion */
     282                 :         IndexTuple *itvec;
     283                 :         int         tlen;
     284           12298 :         SplitedPageLayout *dist = NULL,
     285                 :                    *ptr;
     286           12298 :         BlockNumber oldrlink = InvalidBlockNumber;
     287           12298 :         GistNSN     oldnsn = 0;
     288                 :         SplitedPageLayout rootpg;
     289                 :         bool        is_rootsplit;
     290                 :         int         npage;
     291                 : 
     292           12298 :         is_rootsplit = (blkno == GIST_ROOT_BLKNO);
     293                 : 
     294                 :         /*
     295                 :          * Form index tuples vector to split. If we're replacing an old tuple,
     296                 :          * remove the old version from the vector.
     297                 :          */
     298           12298 :         itvec = gistextractpage(page, &tlen);
     299           12298 :         if (OffsetNumberIsValid(oldoffnum))
     300                 :         {
     301                 :             /* on inner page we should remove old tuple */
     302            2391 :             int         pos = oldoffnum - FirstOffsetNumber;
     303                 : 
     304            2391 :             tlen--;
     305            2391 :             if (pos != tlen)
     306            1516 :                 memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
     307                 :         }
     308           12298 :         itvec = gistjoinvector(itvec, &tlen, itup, ntup);
     309           12298 :         dist = gistSplit(rel, page, itvec, tlen, giststate);
     310                 : 
     311                 :         /*
     312                 :          * Check that split didn't produce too many pages.
     313                 :          */
     314           12298 :         npage = 0;
     315           36921 :         for (ptr = dist; ptr; ptr = ptr->next)
     316           24623 :             npage++;
     317                 :         /* in a root split, we'll add one more page to the list below */
     318           12298 :         if (is_rootsplit)
     319             192 :             npage++;
     320           12298 :         if (npage > GIST_MAX_SPLIT_PAGES)
     321 UBC           0 :             elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
     322                 :                  npage, GIST_MAX_SPLIT_PAGES);
     323                 : 
     324                 :         /*
     325                 :          * Set up pages to work with. Allocate new buffers for all but the
     326                 :          * leftmost page. The original page becomes the new leftmost page, and
     327                 :          * is just replaced with the new contents.
     328                 :          *
     329                 :          * For a root-split, allocate new buffers for all child pages, the
     330                 :          * original page is overwritten with new root page containing
     331                 :          * downlinks to the new child pages.
     332                 :          */
     333 CBC       12298 :         ptr = dist;
     334           12298 :         if (!is_rootsplit)
     335                 :         {
     336                 :             /* save old rightlink and NSN */
     337           12106 :             oldrlink = GistPageGetOpaque(page)->rightlink;
     338           12106 :             oldnsn = GistPageGetNSN(page);
     339                 : 
     340           12106 :             dist->buffer = buffer;
     341           12106 :             dist->block.blkno = BufferGetBlockNumber(buffer);
     342           12106 :             dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
     343                 : 
     344                 :             /* clean all flags except F_LEAF */
     345           12106 :             GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
     346                 : 
     347           12106 :             ptr = ptr->next;
     348                 :         }
     349           24815 :         for (; ptr; ptr = ptr->next)
     350                 :         {
     351                 :             /* Allocate new page */
     352 GNC       12517 :             ptr->buffer = gistNewBuffer(rel, heapRel);
     353 CBC       12517 :             GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
     354           12517 :             ptr->page = BufferGetPage(ptr->buffer);
     355           12517 :             ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
     356           12517 :             PredicateLockPageSplit(rel,
     357                 :                                    BufferGetBlockNumber(buffer),
     358                 :                                    BufferGetBlockNumber(ptr->buffer));
     359                 :         }
     360                 : 
     361                 :         /*
     362                 :          * Now that we know which blocks the new pages go to, set up downlink
     363                 :          * tuples to point to them.
     364                 :          */
     365           36921 :         for (ptr = dist; ptr; ptr = ptr->next)
     366                 :         {
     367           24623 :             ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
     368           24623 :             GistTupleSetValid(ptr->itup);
     369                 :         }
     370                 : 
     371                 :         /*
     372                 :          * If this is a root split, we construct the new root page with the
     373                 :          * downlinks here directly, instead of requiring the caller to insert
     374                 :          * them. Add the new root page to the list along with the child pages.
     375                 :          */
     376           12298 :         if (is_rootsplit)
     377                 :         {
     378                 :             IndexTuple *downlinks;
     379             192 :             int         ndownlinks = 0;
     380                 :             int         i;
     381                 : 
     382             192 :             rootpg.buffer = buffer;
     383             192 :             rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
     384             192 :             GistPageGetOpaque(rootpg.page)->flags = 0;
     385                 : 
     386                 :             /* Prepare a vector of all the downlinks */
     387             579 :             for (ptr = dist; ptr; ptr = ptr->next)
     388             387 :                 ndownlinks++;
     389             192 :             downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
     390             579 :             for (i = 0, ptr = dist; ptr; ptr = ptr->next)
     391             387 :                 downlinks[i++] = ptr->itup;
     392                 : 
     393             192 :             rootpg.block.blkno = GIST_ROOT_BLKNO;
     394             192 :             rootpg.block.num = ndownlinks;
     395             192 :             rootpg.list = gistfillitupvec(downlinks, ndownlinks,
     396                 :                                           &(rootpg.lenlist));
     397             192 :             rootpg.itup = NULL;
     398                 : 
     399             192 :             rootpg.next = dist;
     400             192 :             dist = &rootpg;
     401                 :         }
     402                 :         else
     403                 :         {
     404                 :             /* Prepare split-info to be returned to caller */
     405           36342 :             for (ptr = dist; ptr; ptr = ptr->next)
     406                 :             {
     407           24236 :                 GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
     408                 : 
     409           24236 :                 si->buf = ptr->buffer;
     410           24236 :                 si->downlink = ptr->itup;
     411           24236 :                 *splitinfo = lappend(*splitinfo, si);
     412                 :             }
     413                 :         }
     414                 : 
     415                 :         /*
     416                 :          * Fill all pages. All the pages are new, ie. freshly allocated empty
     417                 :          * pages, or a temporary copy of the old page.
     418                 :          */
     419           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
     420                 :         {
     421           24815 :             char       *data = (char *) (ptr->list);
     422                 : 
     423 GNC      923771 :             for (int i = 0; i < ptr->block.num; i++)
     424                 :             {
     425 CBC      898956 :                 IndexTuple  thistup = (IndexTuple) data;
     426                 : 
     427          898956 :                 if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
     428 UBC           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
     429                 : 
     430                 :                 /*
     431                 :                  * If this is the first inserted/updated tuple, let the caller
     432                 :                  * know which page it landed on.
     433                 :                  */
     434 CBC      898956 :                 if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
     435             387 :                     *newblkno = ptr->block.blkno;
     436                 : 
     437          898956 :                 data += IndexTupleSize(thistup);
     438                 :             }
     439                 : 
     440                 :             /* Set up rightlinks */
     441           24815 :             if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
     442           24650 :                 GistPageGetOpaque(ptr->page)->rightlink =
     443           12325 :                     ptr->next->block.blkno;
     444                 :             else
     445           12490 :                 GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
     446                 : 
     447                 :             /*
     448                 :              * Mark the all but the right-most page with the follow-right
     449                 :              * flag. It will be cleared as soon as the downlink is inserted
     450                 :              * into the parent, but this ensures that if we error out before
     451                 :              * that, the index is still consistent. (in buffering build mode,
     452                 :              * any error will abort the index build anyway, so this is not
     453                 :              * needed.)
     454                 :              */
     455           24815 :             if (ptr->next && !is_rootsplit && markfollowright)
     456           11746 :                 GistMarkFollowRight(ptr->page);
     457                 :             else
     458           13069 :                 GistClearFollowRight(ptr->page);
     459                 : 
     460                 :             /*
     461                 :              * Copy the NSN of the original page to all pages. The
     462                 :              * F_FOLLOW_RIGHT flags ensure that scans will follow the
     463                 :              * rightlinks until the downlinks are inserted.
     464                 :              */
     465           24815 :             GistPageSetNSN(ptr->page, oldnsn);
     466                 :         }
     467                 : 
     468                 :         /*
     469                 :          * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
     470                 :          * insertion for that. NB: The number of pages and data segments
     471                 :          * specified here must match the calculations in gistXLogSplit()!
     472                 :          */
     473           12298 :         if (!is_build && RelationNeedsWAL(rel))
     474            1730 :             XLogEnsureRecordSpace(npage, 1 + npage * 2);
     475                 : 
     476           12298 :         START_CRIT_SECTION();
     477                 : 
     478                 :         /*
     479                 :          * Must mark buffers dirty before XLogInsert, even though we'll still
     480                 :          * be changing their opaque fields below.
     481                 :          */
     482           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
     483           24815 :             MarkBufferDirty(ptr->buffer);
     484           12298 :         if (BufferIsValid(leftchildbuf))
     485            2364 :             MarkBufferDirty(leftchildbuf);
     486                 : 
     487                 :         /*
     488                 :          * The first page in the chain was a temporary working copy meant to
     489                 :          * replace the old page. Copy it over the old page.
     490                 :          */
     491           12298 :         PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
     492           12298 :         dist->page = BufferGetPage(dist->buffer);
     493                 : 
     494                 :         /*
     495                 :          * Write the WAL record.
     496                 :          *
     497                 :          * If we're building a new index, however, we don't WAL-log changes
     498                 :          * yet. The LSN-NSN interlock between parent and child requires that
     499                 :          * LSNs never move backwards, so set the LSNs to a value that's
     500                 :          * smaller than any real or fake unlogged LSN that might be generated
     501                 :          * later. (There can't be any concurrent scans during index build, so
     502                 :          * we don't need to be able to detect concurrent splits yet.)
     503                 :          */
     504           12298 :         if (is_build)
     505           10565 :             recptr = GistBuildLSN;
     506                 :         else
     507                 :         {
     508            1733 :             if (RelationNeedsWAL(rel))
     509            1730 :                 recptr = gistXLogSplit(is_leaf,
     510                 :                                        dist, oldrlink, oldnsn, leftchildbuf,
     511                 :                                        markfollowright);
     512                 :             else
     513               3 :                 recptr = gistGetFakeLSN(rel);
     514                 :         }
     515                 : 
     516           37113 :         for (ptr = dist; ptr; ptr = ptr->next)
     517           24815 :             PageSetLSN(ptr->page, recptr);
     518                 : 
     519                 :         /*
     520                 :          * Return the new child buffers to the caller.
     521                 :          *
     522                 :          * If this was a root split, we've already inserted the downlink
     523                 :          * pointers, in the form of a new root page. Therefore we can release
     524                 :          * all the new buffers, and keep just the root page locked.
     525                 :          */
     526           12298 :         if (is_rootsplit)
     527                 :         {
     528             579 :             for (ptr = dist->next; ptr; ptr = ptr->next)
     529             387 :                 UnlockReleaseBuffer(ptr->buffer);
     530                 :         }
     531                 :     }
     532                 :     else
     533                 :     {
     534                 :         /*
     535                 :          * Enough space.  We always get here if ntup==0.
     536                 :          */
     537          834765 :         START_CRIT_SECTION();
     538                 : 
     539                 :         /*
     540                 :          * Delete old tuple if any, then insert new tuple(s) if any.  If
     541                 :          * possible, use the fast path of PageIndexTupleOverwrite.
     542                 :          */
     543          834765 :         if (OffsetNumberIsValid(oldoffnum))
     544                 :         {
     545          371373 :             if (ntup == 1)
     546                 :             {
     547                 :                 /* One-for-one replacement, so use PageIndexTupleOverwrite */
     548          361640 :                 if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
     549          361640 :                                              IndexTupleSize(*itup)))
     550 UBC           0 :                     elog(ERROR, "failed to add item to index page in \"%s\"",
     551                 :                          RelationGetRelationName(rel));
     552                 :             }
     553                 :             else
     554                 :             {
     555                 :                 /* Delete old, then append new tuple(s) to page */
     556 CBC        9733 :                 PageIndexTupleDelete(page, oldoffnum);
     557            9733 :                 gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
     558                 :             }
     559                 :         }
     560                 :         else
     561                 :         {
     562                 :             /* Just append new tuples at the end of the page */
     563          463392 :             gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
     564                 :         }
     565                 : 
     566          834765 :         MarkBufferDirty(buffer);
     567                 : 
     568          834765 :         if (BufferIsValid(leftchildbuf))
     569            9382 :             MarkBufferDirty(leftchildbuf);
     570                 : 
     571          834765 :         if (is_build)
     572          588316 :             recptr = GistBuildLSN;
     573                 :         else
     574                 :         {
     575          246449 :             if (RelationNeedsWAL(rel))
     576          246410 :             {
     577          246410 :                 OffsetNumber ndeloffs = 0,
     578                 :                             deloffs[1];
     579                 : 
     580          246410 :                 if (OffsetNumberIsValid(oldoffnum))
     581                 :                 {
     582           96869 :                     deloffs[0] = oldoffnum;
     583           96869 :                     ndeloffs = 1;
     584                 :                 }
     585                 : 
     586          246410 :                 recptr = gistXLogUpdate(buffer,
     587                 :                                         deloffs, ndeloffs, itup, ntup,
     588                 :                                         leftchildbuf);
     589                 :             }
     590                 :             else
     591              39 :                 recptr = gistGetFakeLSN(rel);
     592                 :         }
     593          834765 :         PageSetLSN(page, recptr);
     594                 : 
     595          834765 :         if (newblkno)
     596           52185 :             *newblkno = blkno;
     597                 :     }
     598                 : 
     599                 :     /*
     600                 :      * If we inserted the downlink for a child page, set NSN and clear
     601                 :      * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
     602                 :      * follow the rightlink if and only if they looked at the parent page
     603                 :      * before we inserted the downlink.
     604                 :      *
     605                 :      * Note that we do this *after* writing the WAL record. That means that
     606                 :      * the possible full page image in the WAL record does not include these
     607                 :      * changes, and they must be replayed even if the page is restored from
     608                 :      * the full page image. There's a chicken-and-egg problem: if we updated
     609                 :      * the child pages first, we wouldn't know the recptr of the WAL record
     610                 :      * we're about to write.
     611                 :      */
     612          847063 :     if (BufferIsValid(leftchildbuf))
     613                 :     {
     614           11746 :         Page        leftpg = BufferGetPage(leftchildbuf);
     615                 : 
     616           11746 :         GistPageSetNSN(leftpg, recptr);
     617           11746 :         GistClearFollowRight(leftpg);
     618                 : 
     619           11746 :         PageSetLSN(leftpg, recptr);
     620                 :     }
     621                 : 
     622          847063 :     END_CRIT_SECTION();
     623                 : 
     624          847063 :     return is_split;
     625                 : }
     626                 : 
     627                 : /*
     628                 :  * Workhouse routine for doing insertion into a GiST index. Note that
     629                 :  * this routine assumes it is invoked in a short-lived memory context,
     630                 :  * so it does not bother releasing palloc'd allocations.
     631                 :  */
     632                 : void
     633          455566 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
     634                 :              GISTSTATE *giststate, Relation heapRel, bool is_build)
     635                 : {
     636                 :     ItemId      iid;
     637                 :     IndexTuple  idxtuple;
     638                 :     GISTInsertStack firststack;
     639                 :     GISTInsertStack *stack;
     640                 :     GISTInsertState state;
     641          455566 :     bool        xlocked = false;
     642                 : 
     643          455566 :     memset(&state, 0, sizeof(GISTInsertState));
     644          455566 :     state.freespace = freespace;
     645          455566 :     state.r = r;
     646          455566 :     state.heapRel = heapRel;
     647          455566 :     state.is_build = is_build;
     648                 : 
     649                 :     /* Start from the root */
     650          455566 :     firststack.blkno = GIST_ROOT_BLKNO;
     651          455566 :     firststack.lsn = 0;
     652          455566 :     firststack.retry_from_parent = false;
     653          455566 :     firststack.parent = NULL;
     654          455566 :     firststack.downlinkoffnum = InvalidOffsetNumber;
     655          455566 :     state.stack = stack = &firststack;
     656                 : 
     657                 :     /*
     658                 :      * Walk down along the path of smallest penalty, updating the parent
     659                 :      * pointers with the key we're inserting as we go. If we crash in the
     660                 :      * middle, the tree is consistent, although the possible parent updates
     661                 :      * were a waste.
     662                 :      */
     663                 :     for (;;)
     664                 :     {
     665                 :         /*
     666                 :          * If we split an internal page while descending the tree, we have to
     667                 :          * retry at the parent. (Normally, the LSN-NSN interlock below would
     668                 :          * also catch this and cause us to retry. But LSNs are not updated
     669                 :          * during index build.)
     670                 :          */
     671         1007742 :         while (stack->retry_from_parent)
     672                 :         {
     673 UBC           0 :             if (xlocked)
     674               0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     675               0 :             xlocked = false;
     676               0 :             ReleaseBuffer(stack->buffer);
     677               0 :             state.stack = stack = stack->parent;
     678                 :         }
     679                 : 
     680 CBC     1007742 :         if (XLogRecPtrIsInvalid(stack->lsn))
     681         1007724 :             stack->buffer = ReadBuffer(state.r, stack->blkno);
     682                 : 
     683                 :         /*
     684                 :          * Be optimistic and grab shared lock first. Swap it for an exclusive
     685                 :          * lock later if we need to update the page.
     686                 :          */
     687         1007742 :         if (!xlocked)
     688                 :         {
     689         1007742 :             LockBuffer(stack->buffer, GIST_SHARE);
     690         1007742 :             gistcheckpage(state.r, stack->buffer);
     691                 :         }
     692                 : 
     693         1007742 :         stack->page = (Page) BufferGetPage(stack->buffer);
     694         1007742 :         stack->lsn = xlocked ?
     695         1007742 :             PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
     696         1007742 :         Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
     697                 : 
     698                 :         /*
     699                 :          * If this page was split but the downlink was never inserted to the
     700                 :          * parent because the inserting backend crashed before doing that, fix
     701                 :          * that now.
     702                 :          */
     703         1007742 :         if (GistFollowRight(stack->page))
     704                 :         {
     705 UBC           0 :             if (!xlocked)
     706                 :             {
     707               0 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     708               0 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     709               0 :                 xlocked = true;
     710                 :                 /* someone might've completed the split when we unlocked */
     711               0 :                 if (!GistFollowRight(stack->page))
     712               0 :                     continue;
     713                 :             }
     714               0 :             gistfixsplit(&state, giststate);
     715                 : 
     716               0 :             UnlockReleaseBuffer(stack->buffer);
     717               0 :             xlocked = false;
     718               0 :             state.stack = stack = stack->parent;
     719               0 :             continue;
     720                 :         }
     721                 : 
     722 CBC     1559900 :         if ((stack->blkno != GIST_ROOT_BLKNO &&
     723          552158 :              stack->parent->lsn < GistPageGetNSN(stack->page)) ||
     724         1007742 :             GistPageIsDeleted(stack->page))
     725                 :         {
     726                 :             /*
     727                 :              * Concurrent split or page deletion detected. There's no
     728                 :              * guarantee that the downlink for this page is consistent with
     729                 :              * the tuple we're inserting anymore, so go back to parent and
     730                 :              * rechoose the best child.
     731                 :              */
     732 UBC           0 :             UnlockReleaseBuffer(stack->buffer);
     733               0 :             xlocked = false;
     734               0 :             state.stack = stack = stack->parent;
     735               0 :             continue;
     736                 :         }
     737                 : 
     738 CBC     1007742 :         if (!GistPageIsLeaf(stack->page))
     739                 :         {
     740                 :             /*
     741                 :              * This is an internal page so continue to walk down the tree.
     742                 :              * Find the child node that has the minimum insertion penalty.
     743                 :              */
     744                 :             BlockNumber childblkno;
     745                 :             IndexTuple  newtup;
     746                 :             GISTInsertStack *item;
     747                 :             OffsetNumber downlinkoffnum;
     748                 : 
     749          552176 :             downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
     750          552176 :             iid = PageGetItemId(stack->page, downlinkoffnum);
     751          552176 :             idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
     752          552176 :             childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     753                 : 
     754                 :             /*
     755                 :              * Check that it's not a leftover invalid tuple from pre-9.1
     756                 :              */
     757          552176 :             if (GistTupleIsInvalid(idxtuple))
     758 UBC           0 :                 ereport(ERROR,
     759                 :                         (errmsg("index \"%s\" contains an inner tuple marked as invalid",
     760                 :                                 RelationGetRelationName(r)),
     761                 :                          errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
     762                 :                          errhint("Please REINDEX it.")));
     763                 : 
     764                 :             /*
     765                 :              * Check that the key representing the target child node is
     766                 :              * consistent with the key we're inserting. Update it if it's not.
     767                 :              */
     768 CBC      552176 :             newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
     769          552176 :             if (newtup)
     770                 :             {
     771                 :                 /*
     772                 :                  * Swap shared lock for an exclusive one. Beware, the page may
     773                 :                  * change while we unlock/lock the page...
     774                 :                  */
     775          327185 :                 if (!xlocked)
     776                 :                 {
     777          327185 :                     LockBuffer(stack->buffer, GIST_UNLOCK);
     778          327185 :                     LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     779          327185 :                     xlocked = true;
     780          327185 :                     stack->page = (Page) BufferGetPage(stack->buffer);
     781                 : 
     782          327185 :                     if (PageGetLSN(stack->page) != stack->lsn)
     783                 :                     {
     784                 :                         /* the page was changed while we unlocked it, retry */
     785 UBC           0 :                         continue;
     786                 :                     }
     787                 :                 }
     788                 : 
     789                 :                 /*
     790                 :                  * Update the tuple.
     791                 :                  *
     792                 :                  * We still hold the lock after gistinserttuple(), but it
     793                 :                  * might have to split the page to make the updated tuple fit.
     794                 :                  * In that case the updated tuple might migrate to the other
     795                 :                  * half of the split, so we have to go back to the parent and
     796                 :                  * descend back to the half that's a better fit for the new
     797                 :                  * tuple.
     798                 :                  */
     799 CBC      327185 :                 if (gistinserttuple(&state, stack, giststate, newtup,
     800                 :                                     downlinkoffnum))
     801                 :                 {
     802                 :                     /*
     803                 :                      * If this was a root split, the root page continues to be
     804                 :                      * the parent and the updated tuple went to one of the
     805                 :                      * child pages, so we just need to retry from the root
     806                 :                      * page.
     807                 :                      */
     808              18 :                     if (stack->blkno != GIST_ROOT_BLKNO)
     809                 :                     {
     810              18 :                         UnlockReleaseBuffer(stack->buffer);
     811              18 :                         xlocked = false;
     812              18 :                         state.stack = stack = stack->parent;
     813                 :                     }
     814              18 :                     continue;
     815                 :                 }
     816                 :             }
     817          552158 :             LockBuffer(stack->buffer, GIST_UNLOCK);
     818          552158 :             xlocked = false;
     819                 : 
     820                 :             /* descend to the chosen child */
     821          552158 :             item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     822          552158 :             item->blkno = childblkno;
     823          552158 :             item->parent = stack;
     824          552158 :             item->downlinkoffnum = downlinkoffnum;
     825          552158 :             state.stack = stack = item;
     826                 :         }
     827                 :         else
     828                 :         {
     829                 :             /*
     830                 :              * Leaf page. Insert the new key. We've already updated all the
     831                 :              * parents on the way down, but we might have to split the page if
     832                 :              * it doesn't fit. gistinserttuple() will take care of that.
     833                 :              */
     834                 : 
     835                 :             /*
     836                 :              * Swap shared lock for an exclusive one. Be careful, the page may
     837                 :              * change while we unlock/lock the page...
     838                 :              */
     839          455566 :             if (!xlocked)
     840                 :             {
     841          455566 :                 LockBuffer(stack->buffer, GIST_UNLOCK);
     842          455566 :                 LockBuffer(stack->buffer, GIST_EXCLUSIVE);
     843          455566 :                 xlocked = true;
     844          455566 :                 stack->page = (Page) BufferGetPage(stack->buffer);
     845          455566 :                 stack->lsn = PageGetLSN(stack->page);
     846                 : 
     847          455566 :                 if (stack->blkno == GIST_ROOT_BLKNO)
     848                 :                 {
     849                 :                     /*
     850                 :                      * the only page that can become inner instead of leaf is
     851                 :                      * the root page, so for root we should recheck it
     852                 :                      */
     853           25879 :                     if (!GistPageIsLeaf(stack->page))
     854                 :                     {
     855                 :                         /*
     856                 :                          * very rare situation: during unlock/lock index with
     857                 :                          * number of pages = 1 was increased
     858                 :                          */
     859 UBC           0 :                         LockBuffer(stack->buffer, GIST_UNLOCK);
     860               0 :                         xlocked = false;
     861               0 :                         continue;
     862                 :                     }
     863                 : 
     864                 :                     /*
     865                 :                      * we don't need to check root split, because checking
     866                 :                      * leaf/inner is enough to recognize split for root
     867                 :                      */
     868                 :                 }
     869 CBC      859374 :                 else if ((GistFollowRight(stack->page) ||
     870          429687 :                           stack->parent->lsn < GistPageGetNSN(stack->page)) ||
     871          429687 :                          GistPageIsDeleted(stack->page))
     872                 :                 {
     873                 :                     /*
     874                 :                      * The page was split or deleted while we momentarily
     875                 :                      * unlocked the page. Go back to parent.
     876                 :                      */
     877 UBC           0 :                     UnlockReleaseBuffer(stack->buffer);
     878               0 :                     xlocked = false;
     879               0 :                     state.stack = stack = stack->parent;
     880               0 :                     continue;
     881                 :                 }
     882                 :             }
     883                 : 
     884                 :             /* now state.stack->(page, buffer and blkno) points to leaf page */
     885                 : 
     886 CBC      455566 :             gistinserttuple(&state, stack, giststate, itup,
     887                 :                             InvalidOffsetNumber);
     888          455560 :             LockBuffer(stack->buffer, GIST_UNLOCK);
     889                 : 
     890                 :             /* Release any pins we might still hold before exiting */
     891         1463254 :             for (; stack; stack = stack->parent)
     892         1007694 :                 ReleaseBuffer(stack->buffer);
     893          455560 :             break;
     894                 :         }
     895                 :     }
     896          455560 : }
     897                 : 
     898                 : /*
     899                 :  * Traverse the tree to find path from root page to specified "child" block.
     900                 :  *
     901                 :  * returns a new insertion stack, starting from the parent of "child", up
     902                 :  * to the root. *downlinkoffnum is set to the offset of the downlink in the
     903                 :  * direct parent of child.
     904                 :  *
     905                 :  * To prevent deadlocks, this should lock only one page at a time.
     906                 :  */
     907                 : static GISTInsertStack *
     908 UBC           0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
     909                 : {
     910                 :     Page        page;
     911                 :     Buffer      buffer;
     912                 :     OffsetNumber i,
     913                 :                 maxoff;
     914                 :     ItemId      iid;
     915                 :     IndexTuple  idxtuple;
     916                 :     List       *fifo;
     917                 :     GISTInsertStack *top,
     918                 :                *ptr;
     919                 :     BlockNumber blkno;
     920                 : 
     921               0 :     top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     922               0 :     top->blkno = GIST_ROOT_BLKNO;
     923               0 :     top->downlinkoffnum = InvalidOffsetNumber;
     924                 : 
     925               0 :     fifo = list_make1(top);
     926               0 :     while (fifo != NIL)
     927                 :     {
     928                 :         /* Get next page to visit */
     929               0 :         top = linitial(fifo);
     930               0 :         fifo = list_delete_first(fifo);
     931                 : 
     932               0 :         buffer = ReadBuffer(r, top->blkno);
     933               0 :         LockBuffer(buffer, GIST_SHARE);
     934               0 :         gistcheckpage(r, buffer);
     935               0 :         page = (Page) BufferGetPage(buffer);
     936                 : 
     937               0 :         if (GistPageIsLeaf(page))
     938                 :         {
     939                 :             /*
     940                 :              * Because we scan the index top-down, all the rest of the pages
     941                 :              * in the queue must be leaf pages as well.
     942                 :              */
     943               0 :             UnlockReleaseBuffer(buffer);
     944               0 :             break;
     945                 :         }
     946                 : 
     947                 :         /* currently, internal pages are never deleted */
     948               0 :         Assert(!GistPageIsDeleted(page));
     949                 : 
     950               0 :         top->lsn = BufferGetLSNAtomic(buffer);
     951                 : 
     952                 :         /*
     953                 :          * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
     954                 :          * downlink. This should not normally happen..
     955                 :          */
     956               0 :         if (GistFollowRight(page))
     957               0 :             elog(ERROR, "concurrent GiST page split was incomplete");
     958                 : 
     959               0 :         if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
     960               0 :             GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
     961                 :         {
     962                 :             /*
     963                 :              * Page was split while we looked elsewhere. We didn't see the
     964                 :              * downlink to the right page when we scanned the parent, so add
     965                 :              * it to the queue now.
     966                 :              *
     967                 :              * Put the right page ahead of the queue, so that we visit it
     968                 :              * next. That's important, because if this is the lowest internal
     969                 :              * level, just above leaves, we might already have queued up some
     970                 :              * leaf pages, and we assume that there can't be any non-leaf
     971                 :              * pages behind leaf pages.
     972                 :              */
     973               0 :             ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     974               0 :             ptr->blkno = GistPageGetOpaque(page)->rightlink;
     975               0 :             ptr->downlinkoffnum = InvalidOffsetNumber;
     976               0 :             ptr->parent = top->parent;
     977                 : 
     978               0 :             fifo = lcons(ptr, fifo);
     979                 :         }
     980                 : 
     981               0 :         maxoff = PageGetMaxOffsetNumber(page);
     982                 : 
     983               0 :         for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
     984                 :         {
     985               0 :             iid = PageGetItemId(page, i);
     986               0 :             idxtuple = (IndexTuple) PageGetItem(page, iid);
     987               0 :             blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
     988               0 :             if (blkno == child)
     989                 :             {
     990                 :                 /* Found it! */
     991               0 :                 UnlockReleaseBuffer(buffer);
     992               0 :                 *downlinkoffnum = i;
     993               0 :                 return top;
     994                 :             }
     995                 :             else
     996                 :             {
     997                 :                 /* Append this child to the list of pages to visit later */
     998               0 :                 ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
     999               0 :                 ptr->blkno = blkno;
    1000               0 :                 ptr->downlinkoffnum = i;
    1001               0 :                 ptr->parent = top;
    1002                 : 
    1003               0 :                 fifo = lappend(fifo, ptr);
    1004                 :             }
    1005                 :         }
    1006                 : 
    1007               0 :         UnlockReleaseBuffer(buffer);
    1008                 :     }
    1009                 : 
    1010               0 :     elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
    1011                 :          RelationGetRelationName(r), child);
    1012                 :     return NULL;                /* keep compiler quiet */
    1013                 : }
    1014                 : 
    1015                 : /*
    1016                 :  * Updates the stack so that child->parent is the correct parent of the
    1017                 :  * child. child->parent must be exclusively locked on entry, and will
    1018                 :  * remain so at exit, but it might not be the same page anymore.
    1019                 :  */
    1020                 : static void
    1021 CBC       11746 : gistFindCorrectParent(Relation r, GISTInsertStack *child)
    1022                 : {
    1023           11746 :     GISTInsertStack *parent = child->parent;
    1024                 : 
    1025           11746 :     gistcheckpage(r, parent->buffer);
    1026           11746 :     parent->page = (Page) BufferGetPage(parent->buffer);
    1027                 : 
    1028                 :     /* here we don't need to distinguish between split and page update */
    1029           23492 :     if (child->downlinkoffnum == InvalidOffsetNumber ||
    1030           11746 :         parent->lsn != PageGetLSN(parent->page))
    1031                 :     {
    1032                 :         /* parent is changed, look child in right links until found */
    1033                 :         OffsetNumber i,
    1034                 :                     maxoff;
    1035                 :         ItemId      iid;
    1036                 :         IndexTuple  idxtuple;
    1037                 :         GISTInsertStack *ptr;
    1038                 : 
    1039                 :         while (true)
    1040                 :         {
    1041            1587 :             maxoff = PageGetMaxOffsetNumber(parent->page);
    1042           74464 :             for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
    1043                 :             {
    1044           74464 :                 iid = PageGetItemId(parent->page, i);
    1045           74464 :                 idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
    1046           74464 :                 if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
    1047                 :                 {
    1048                 :                     /* yes!!, found */
    1049            1587 :                     child->downlinkoffnum = i;
    1050            1587 :                     return;
    1051                 :                 }
    1052                 :             }
    1053                 : 
    1054 UBC           0 :             parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
    1055               0 :             UnlockReleaseBuffer(parent->buffer);
    1056               0 :             if (parent->blkno == InvalidBlockNumber)
    1057                 :             {
    1058                 :                 /*
    1059                 :                  * End of chain and still didn't find parent. It's a very-very
    1060                 :                  * rare situation when root splitted.
    1061                 :                  */
    1062               0 :                 break;
    1063                 :             }
    1064               0 :             parent->buffer = ReadBuffer(r, parent->blkno);
    1065               0 :             LockBuffer(parent->buffer, GIST_EXCLUSIVE);
    1066               0 :             gistcheckpage(r, parent->buffer);
    1067               0 :             parent->page = (Page) BufferGetPage(parent->buffer);
    1068                 :         }
    1069                 : 
    1070                 :         /*
    1071                 :          * awful!!, we need search tree to find parent ... , but before we
    1072                 :          * should release all old parent
    1073                 :          */
    1074                 : 
    1075               0 :         ptr = child->parent->parent;  /* child->parent already released
    1076                 :                                          * above */
    1077               0 :         while (ptr)
    1078                 :         {
    1079               0 :             ReleaseBuffer(ptr->buffer);
    1080               0 :             ptr = ptr->parent;
    1081                 :         }
    1082                 : 
    1083                 :         /* ok, find new path */
    1084               0 :         ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
    1085                 : 
    1086                 :         /* read all buffers as expected by caller */
    1087                 :         /* note we don't lock them or gistcheckpage them here! */
    1088               0 :         while (ptr)
    1089                 :         {
    1090               0 :             ptr->buffer = ReadBuffer(r, ptr->blkno);
    1091               0 :             ptr->page = (Page) BufferGetPage(ptr->buffer);
    1092               0 :             ptr = ptr->parent;
    1093                 :         }
    1094                 : 
    1095                 :         /* install new chain of parents to stack */
    1096               0 :         child->parent = parent;
    1097                 : 
    1098                 :         /* make recursive call to normal processing */
    1099               0 :         LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
    1100               0 :         gistFindCorrectParent(r, child);
    1101                 :     }
    1102                 : }
    1103                 : 
    1104                 : /*
    1105                 :  * Form a downlink pointer for the page in 'buf'.
    1106                 :  */
    1107                 : static IndexTuple
    1108               0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
    1109                 :                  GISTInsertStack *stack)
    1110                 : {
    1111               0 :     Page        page = BufferGetPage(buf);
    1112                 :     OffsetNumber maxoff;
    1113                 :     OffsetNumber offset;
    1114               0 :     IndexTuple  downlink = NULL;
    1115                 : 
    1116               0 :     maxoff = PageGetMaxOffsetNumber(page);
    1117               0 :     for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
    1118                 :     {
    1119                 :         IndexTuple  ituple = (IndexTuple)
    1120               0 :         PageGetItem(page, PageGetItemId(page, offset));
    1121                 : 
    1122               0 :         if (downlink == NULL)
    1123               0 :             downlink = CopyIndexTuple(ituple);
    1124                 :         else
    1125                 :         {
    1126                 :             IndexTuple  newdownlink;
    1127                 : 
    1128               0 :             newdownlink = gistgetadjusted(rel, downlink, ituple,
    1129                 :                                           giststate);
    1130               0 :             if (newdownlink)
    1131               0 :                 downlink = newdownlink;
    1132                 :         }
    1133                 :     }
    1134                 : 
    1135                 :     /*
    1136                 :      * If the page is completely empty, we can't form a meaningful downlink
    1137                 :      * for it. But we have to insert a downlink for the page. Any key will do,
    1138                 :      * as long as its consistent with the downlink of parent page, so that we
    1139                 :      * can legally insert it to the parent. A minimal one that matches as few
    1140                 :      * scans as possible would be best, to keep scans from doing useless work,
    1141                 :      * but we don't know how to construct that. So we just use the downlink of
    1142                 :      * the original page that was split - that's as far from optimal as it can
    1143                 :      * get but will do..
    1144                 :      */
    1145               0 :     if (!downlink)
    1146                 :     {
    1147                 :         ItemId      iid;
    1148                 : 
    1149               0 :         LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
    1150               0 :         gistFindCorrectParent(rel, stack);
    1151               0 :         iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
    1152               0 :         downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
    1153               0 :         downlink = CopyIndexTuple(downlink);
    1154               0 :         LockBuffer(stack->parent->buffer, GIST_UNLOCK);
    1155                 :     }
    1156                 : 
    1157               0 :     ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
    1158               0 :     GistTupleSetValid(downlink);
    1159                 : 
    1160               0 :     return downlink;
    1161                 : }
    1162                 : 
    1163                 : 
    1164                 : /*
    1165                 :  * Complete the incomplete split of state->stack->page.
    1166                 :  */
    1167                 : static void
    1168               0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
    1169                 : {
    1170               0 :     GISTInsertStack *stack = state->stack;
    1171                 :     Buffer      buf;
    1172                 :     Page        page;
    1173               0 :     List       *splitinfo = NIL;
    1174                 : 
    1175               0 :     ereport(LOG,
    1176                 :             (errmsg("fixing incomplete split in index \"%s\", block %u",
    1177                 :                     RelationGetRelationName(state->r), stack->blkno)));
    1178                 : 
    1179               0 :     Assert(GistFollowRight(stack->page));
    1180               0 :     Assert(OffsetNumberIsValid(stack->downlinkoffnum));
    1181                 : 
    1182               0 :     buf = stack->buffer;
    1183                 : 
    1184                 :     /*
    1185                 :      * Read the chain of split pages, following the rightlinks. Construct a
    1186                 :      * downlink tuple for each page.
    1187                 :      */
    1188                 :     for (;;)
    1189               0 :     {
    1190               0 :         GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
    1191                 :         IndexTuple  downlink;
    1192                 : 
    1193               0 :         page = BufferGetPage(buf);
    1194                 : 
    1195                 :         /* Form the new downlink tuples to insert to parent */
    1196               0 :         downlink = gistformdownlink(state->r, buf, giststate, stack);
    1197                 : 
    1198               0 :         si->buf = buf;
    1199               0 :         si->downlink = downlink;
    1200                 : 
    1201               0 :         splitinfo = lappend(splitinfo, si);
    1202                 : 
    1203               0 :         if (GistFollowRight(page))
    1204                 :         {
    1205                 :             /* lock next page */
    1206               0 :             buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
    1207               0 :             LockBuffer(buf, GIST_EXCLUSIVE);
    1208                 :         }
    1209                 :         else
    1210               0 :             break;
    1211                 :     }
    1212                 : 
    1213                 :     /* Insert the downlinks */
    1214               0 :     gistfinishsplit(state, stack, giststate, splitinfo, false);
    1215               0 : }
    1216                 : 
    1217                 : /*
    1218                 :  * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
    1219                 :  * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
    1220                 :  * 'stack' represents the path from the root to the page being updated.
    1221                 :  *
    1222                 :  * The caller must hold an exclusive lock on stack->buffer.  The lock is still
    1223                 :  * held on return, but the page might not contain the inserted tuple if the
    1224                 :  * page was split. The function returns true if the page was split, false
    1225                 :  * otherwise.
    1226                 :  */
    1227                 : static bool
    1228 CBC      782751 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
    1229                 :                 GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
    1230                 : {
    1231          782751 :     return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
    1232                 :                             InvalidBuffer, InvalidBuffer, false, false);
    1233                 : }
    1234                 : 
    1235                 : /* ----------------
    1236                 :  * An extended workhorse version of gistinserttuple(). This version allows
    1237                 :  * inserting multiple tuples, or replacing a single tuple with multiple tuples.
    1238                 :  * This is used to recursively update the downlinks in the parent when a page
    1239                 :  * is split.
    1240                 :  *
    1241                 :  * If leftchild and rightchild are valid, we're inserting/replacing the
    1242                 :  * downlink for rightchild, and leftchild is its left sibling. We clear the
    1243                 :  * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
    1244                 :  * insertion of the downlink.
    1245                 :  *
    1246                 :  * To avoid holding locks for longer than necessary, when recursing up the
    1247                 :  * tree to update the parents, the locking is a bit peculiar here. On entry,
    1248                 :  * the caller must hold an exclusive lock on stack->buffer, as well as
    1249                 :  * leftchild and rightchild if given. On return:
    1250                 :  *
    1251                 :  *  - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
    1252                 :  *    always kept pinned, however.
    1253                 :  *  - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
    1254                 :  *    is kept pinned.
    1255                 :  *  - Lock and pin on 'rightchild' are always released.
    1256                 :  *
    1257                 :  * Returns 'true' if the page had to be split. Note that if the page was
    1258                 :  * split, the inserted/updated tuples might've been inserted to a right
    1259                 :  * sibling of stack->buffer instead of stack->buffer itself.
    1260                 :  */
    1261                 : static bool
    1262          794497 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
    1263                 :                  GISTSTATE *giststate,
    1264                 :                  IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
    1265                 :                  Buffer leftchild, Buffer rightchild,
    1266                 :                  bool unlockbuf, bool unlockleftchild)
    1267                 : {
    1268                 :     List       *splitinfo;
    1269                 :     bool        is_split;
    1270                 : 
    1271                 :     /*
    1272                 :      * Check for any rw conflicts (in serializable isolation level) just
    1273                 :      * before we intend to modify the page
    1274                 :      */
    1275          794497 :     CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
    1276                 : 
    1277                 :     /* Insert the tuple(s) to the page, splitting the page if necessary */
    1278          794491 :     is_split = gistplacetopage(state->r, state->freespace, giststate,
    1279                 :                                stack->buffer,
    1280                 :                                tuples, ntup,
    1281                 :                                oldoffnum, NULL,
    1282                 :                                leftchild,
    1283                 :                                &splitinfo,
    1284                 :                                true,
    1285                 :                                state->heapRel,
    1286          794491 :                                state->is_build);
    1287                 : 
    1288                 :     /*
    1289                 :      * Before recursing up in case the page was split, release locks on the
    1290                 :      * child pages. We don't need to keep them locked when updating the
    1291                 :      * parent.
    1292                 :      */
    1293          794491 :     if (BufferIsValid(rightchild))
    1294           11746 :         UnlockReleaseBuffer(rightchild);
    1295          794491 :     if (BufferIsValid(leftchild) && unlockleftchild)
    1296            2317 :         LockBuffer(leftchild, GIST_UNLOCK);
    1297                 : 
    1298                 :     /*
    1299                 :      * If we had to split, insert/update the downlinks in the parent. If the
    1300                 :      * caller requested us to release the lock on stack->buffer, tell
    1301                 :      * gistfinishsplit() to do that as soon as it's safe to do so. If we
    1302                 :      * didn't have to split, release it ourselves.
    1303                 :      */
    1304          794491 :     if (splitinfo)
    1305           11722 :         gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
    1306          782769 :     else if (unlockbuf)
    1307            9405 :         LockBuffer(stack->buffer, GIST_UNLOCK);
    1308                 : 
    1309          794491 :     return is_split;
    1310                 : }
    1311                 : 
    1312                 : /*
    1313                 :  * Finish an incomplete split by inserting/updating the downlinks in parent
    1314                 :  * page. 'splitinfo' contains all the child pages involved in the split,
    1315                 :  * from left-to-right.
    1316                 :  *
    1317                 :  * On entry, the caller must hold a lock on stack->buffer and all the child
    1318                 :  * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
    1319                 :  * released on return. The child pages are always unlocked and unpinned.
    1320                 :  */
    1321                 : static void
    1322           11722 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
    1323                 :                 GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
    1324                 : {
    1325                 :     GISTPageSplitInfo *right;
    1326                 :     GISTPageSplitInfo *left;
    1327                 :     IndexTuple  tuples[2];
    1328                 : 
    1329                 :     /* A split always contains at least two halves */
    1330           11722 :     Assert(list_length(splitinfo) >= 2);
    1331                 : 
    1332                 :     /*
    1333                 :      * We need to insert downlinks for each new page, and update the downlink
    1334                 :      * for the original (leftmost) page in the split. Begin at the rightmost
    1335                 :      * page, inserting one downlink at a time until there's only two pages
    1336                 :      * left. Finally insert the downlink for the last new page and update the
    1337                 :      * downlink for the original page as one operation.
    1338                 :      */
    1339           11722 :     LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
    1340                 : 
    1341                 :     /*
    1342                 :      * Insert downlinks for the siblings from right to left, until there are
    1343                 :      * only two siblings left.
    1344                 :      */
    1345           11746 :     for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
    1346                 :     {
    1347              24 :         right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
    1348              24 :         left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
    1349                 : 
    1350              24 :         gistFindCorrectParent(state->r, stack);
    1351              24 :         if (gistinserttuples(state, stack->parent, giststate,
    1352                 :                              &right->downlink, 1,
    1353                 :                              InvalidOffsetNumber,
    1354                 :                              left->buf, right->buf, false, false))
    1355                 :         {
    1356                 :             /*
    1357                 :              * If the parent page was split, the existing downlink might have
    1358                 :              * moved.
    1359                 :              */
    1360 UBC           0 :             stack->downlinkoffnum = InvalidOffsetNumber;
    1361                 :         }
    1362                 :         /* gistinserttuples() released the lock on right->buf. */
    1363                 :     }
    1364                 : 
    1365 CBC       11722 :     right = (GISTPageSplitInfo *) lsecond(splitinfo);
    1366           11722 :     left = (GISTPageSplitInfo *) linitial(splitinfo);
    1367                 : 
    1368                 :     /*
    1369                 :      * Finally insert downlink for the remaining right page and update the
    1370                 :      * downlink for the original page to not contain the tuples that were
    1371                 :      * moved to the new pages.
    1372                 :      */
    1373           11722 :     tuples[0] = left->downlink;
    1374           11722 :     tuples[1] = right->downlink;
    1375           11722 :     gistFindCorrectParent(state->r, stack);
    1376           11722 :     if (gistinserttuples(state, stack->parent, giststate,
    1377                 :                          tuples, 2,
    1378           11722 :                          stack->downlinkoffnum,
    1379                 :                          left->buf, right->buf,
    1380                 :                          true,  /* Unlock parent */
    1381                 :                          unlockbuf  /* Unlock stack->buffer if caller wants
    1382                 :                                      * that */
    1383                 :                          ))
    1384                 :     {
    1385                 :         /*
    1386                 :          * If the parent page was split, the downlink might have moved.
    1387                 :          */
    1388            2364 :         stack->downlinkoffnum = InvalidOffsetNumber;
    1389                 :     }
    1390                 : 
    1391           11722 :     Assert(left->buf == stack->buffer);
    1392                 : 
    1393                 :     /*
    1394                 :      * If we split the page because we had to adjust the downlink on an
    1395                 :      * internal page, while descending the tree for inserting a new tuple,
    1396                 :      * then this might no longer be the correct page for the new tuple. The
    1397                 :      * downlink to this page might not cover the new tuple anymore, it might
    1398                 :      * need to go to the newly-created right sibling instead. Tell the caller
    1399                 :      * to walk back up the stack, to re-check at the parent which page to
    1400                 :      * insert to.
    1401                 :      *
    1402                 :      * Normally, the LSN-NSN interlock during the tree descend would also
    1403                 :      * detect that a concurrent split happened (by ourselves), and cause us to
    1404                 :      * retry at the parent. But that mechanism doesn't work during index
    1405                 :      * build, because we don't do WAL-logging, and don't update LSNs, during
    1406                 :      * index build.
    1407                 :      */
    1408           11722 :     stack->retry_from_parent = true;
    1409           11722 : }
    1410                 : 
    1411                 : /*
    1412                 :  * gistSplit -- split a page in the tree and fill struct
    1413                 :  * used for XLOG and real writes buffers. Function is recursive, ie
    1414                 :  * it will split page until keys will fit in every page.
    1415                 :  */
    1416                 : SplitedPageLayout *
    1417           12884 : gistSplit(Relation r,
    1418                 :           Page page,
    1419                 :           IndexTuple *itup,     /* contains compressed entry */
    1420                 :           int len,
    1421                 :           GISTSTATE *giststate)
    1422                 : {
    1423                 :     IndexTuple *lvectup,
    1424                 :                *rvectup;
    1425                 :     GistSplitVector v;
    1426                 :     int         i;
    1427           12884 :     SplitedPageLayout *res = NULL;
    1428                 : 
    1429                 :     /* this should never recurse very deeply, but better safe than sorry */
    1430           12884 :     check_stack_depth();
    1431                 : 
    1432                 :     /* there's no point in splitting an empty page */
    1433           12884 :     Assert(len > 0);
    1434                 : 
    1435                 :     /*
    1436                 :      * If a single tuple doesn't fit on a page, no amount of splitting will
    1437                 :      * help.
    1438                 :      */
    1439           12884 :     if (len == 1)
    1440 UBC           0 :         ereport(ERROR,
    1441                 :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1442                 :                  errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
    1443                 :                         IndexTupleSize(itup[0]), GiSTPageSize,
    1444                 :                         RelationGetRelationName(r))));
    1445                 : 
    1446 CBC       12884 :     memset(v.spl_lisnull, true,
    1447           12884 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
    1448           12884 :     memset(v.spl_risnull, true,
    1449           12884 :            sizeof(bool) * giststate->nonLeafTupdesc->natts);
    1450           12884 :     gistSplitByKey(r, page, itup, len, giststate, &v, 0);
    1451                 : 
    1452                 :     /* form left and right vector */
    1453           12884 :     lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
    1454           12884 :     rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
    1455                 : 
    1456          540908 :     for (i = 0; i < v.splitVector.spl_nleft; i++)
    1457          528024 :         lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
    1458                 : 
    1459          605738 :     for (i = 0; i < v.splitVector.spl_nright; i++)
    1460          592854 :         rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
    1461                 : 
    1462                 :     /* finalize splitting (may need another split) */
    1463           12884 :     if (!gistfitpage(rvectup, v.splitVector.spl_nright))
    1464                 :     {
    1465             292 :         res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
    1466                 :     }
    1467                 :     else
    1468                 :     {
    1469           12592 :         ROTATEDIST(res);
    1470           12592 :         res->block.num = v.splitVector.spl_nright;
    1471           12592 :         res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
    1472           12592 :         res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
    1473                 :     }
    1474                 : 
    1475           12884 :     if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
    1476                 :     {
    1477                 :         SplitedPageLayout *resptr,
    1478                 :                    *subres;
    1479                 : 
    1480             157 :         resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
    1481                 : 
    1482                 :         /* install on list's tail */
    1483             395 :         while (resptr->next)
    1484             238 :             resptr = resptr->next;
    1485                 : 
    1486             157 :         resptr->next = res;
    1487             157 :         res = subres;
    1488                 :     }
    1489                 :     else
    1490                 :     {
    1491           12727 :         ROTATEDIST(res);
    1492           12727 :         res->block.num = v.splitVector.spl_nleft;
    1493           12727 :         res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
    1494           12727 :         res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
    1495                 :     }
    1496                 : 
    1497           12884 :     return res;
    1498                 : }
    1499                 : 
    1500                 : /*
    1501                 :  * Create a GISTSTATE and fill it with information about the index
    1502                 :  */
    1503                 : GISTSTATE *
    1504            1788 : initGISTstate(Relation index)
    1505                 : {
    1506                 :     GISTSTATE  *giststate;
    1507                 :     MemoryContext scanCxt;
    1508                 :     MemoryContext oldCxt;
    1509                 :     int         i;
    1510                 : 
    1511                 :     /* safety check to protect fixed-size arrays in GISTSTATE */
    1512            1788 :     if (index->rd_att->natts > INDEX_MAX_KEYS)
    1513 UBC           0 :         elog(ERROR, "numberOfAttributes %d > %d",
    1514                 :              index->rd_att->natts, INDEX_MAX_KEYS);
    1515                 : 
    1516                 :     /* Create the memory context that will hold the GISTSTATE */
    1517 CBC        1788 :     scanCxt = AllocSetContextCreate(CurrentMemoryContext,
    1518                 :                                     "GiST scan context",
    1519                 :                                     ALLOCSET_DEFAULT_SIZES);
    1520            1788 :     oldCxt = MemoryContextSwitchTo(scanCxt);
    1521                 : 
    1522                 :     /* Create and fill in the GISTSTATE */
    1523            1788 :     giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
    1524                 : 
    1525            1788 :     giststate->scanCxt = scanCxt;
    1526            1788 :     giststate->tempCxt = scanCxt;    /* caller must change this if needed */
    1527            1788 :     giststate->leafTupdesc = index->rd_att;
    1528                 : 
    1529                 :     /*
    1530                 :      * The truncated tupdesc for non-leaf index tuples, which doesn't contain
    1531                 :      * the INCLUDE attributes.
    1532                 :      *
    1533                 :      * It is used to form tuples during tuple adjustment and page split.
    1534                 :      * B-tree creates shortened tuple descriptor for every truncated tuple,
    1535                 :      * because it is doing this less often: it does not have to form truncated
    1536                 :      * tuples during page split.  Also, B-tree is not adjusting tuples on
    1537                 :      * internal pages the way GiST does.
    1538                 :      */
    1539            1788 :     giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
    1540            1788 :     giststate->nonLeafTupdesc->natts =
    1541            1788 :         IndexRelationGetNumberOfKeyAttributes(index);
    1542                 : 
    1543            3736 :     for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
    1544                 :     {
    1545            1948 :         fmgr_info_copy(&(giststate->consistentFn[i]),
    1546            1948 :                        index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
    1547                 :                        scanCxt);
    1548            1948 :         fmgr_info_copy(&(giststate->unionFn[i]),
    1549            1948 :                        index_getprocinfo(index, i + 1, GIST_UNION_PROC),
    1550                 :                        scanCxt);
    1551                 : 
    1552                 :         /* opclasses are not required to provide a Compress method */
    1553            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
    1554            1448 :             fmgr_info_copy(&(giststate->compressFn[i]),
    1555            1448 :                            index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
    1556                 :                            scanCxt);
    1557                 :         else
    1558             500 :             giststate->compressFn[i].fn_oid = InvalidOid;
    1559                 : 
    1560                 :         /* opclasses are not required to provide a Decompress method */
    1561            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
    1562             745 :             fmgr_info_copy(&(giststate->decompressFn[i]),
    1563             745 :                            index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
    1564                 :                            scanCxt);
    1565                 :         else
    1566            1203 :             giststate->decompressFn[i].fn_oid = InvalidOid;
    1567                 : 
    1568            1948 :         fmgr_info_copy(&(giststate->penaltyFn[i]),
    1569            1948 :                        index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
    1570                 :                        scanCxt);
    1571            1948 :         fmgr_info_copy(&(giststate->picksplitFn[i]),
    1572            1948 :                        index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
    1573                 :                        scanCxt);
    1574            1948 :         fmgr_info_copy(&(giststate->equalFn[i]),
    1575            1948 :                        index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
    1576                 :                        scanCxt);
    1577                 : 
    1578                 :         /* opclasses are not required to provide a Distance method */
    1579            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
    1580             898 :             fmgr_info_copy(&(giststate->distanceFn[i]),
    1581             898 :                            index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
    1582                 :                            scanCxt);
    1583                 :         else
    1584            1050 :             giststate->distanceFn[i].fn_oid = InvalidOid;
    1585                 : 
    1586                 :         /* opclasses are not required to provide a Fetch method */
    1587            1948 :         if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
    1588             631 :             fmgr_info_copy(&(giststate->fetchFn[i]),
    1589             631 :                            index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
    1590                 :                            scanCxt);
    1591                 :         else
    1592            1317 :             giststate->fetchFn[i].fn_oid = InvalidOid;
    1593                 : 
    1594                 :         /*
    1595                 :          * If the index column has a specified collation, we should honor that
    1596                 :          * while doing comparisons.  However, we may have a collatable storage
    1597                 :          * type for a noncollatable indexed data type.  If there's no index
    1598                 :          * collation then specify default collation in case the support
    1599                 :          * functions need collation.  This is harmless if the support
    1600                 :          * functions don't care about collation, so we just do it
    1601                 :          * unconditionally.  (We could alternatively call get_typcollation,
    1602                 :          * but that seems like expensive overkill --- there aren't going to be
    1603                 :          * any cases where a GiST storage type has a nondefault collation.)
    1604                 :          */
    1605            1948 :         if (OidIsValid(index->rd_indcollation[i]))
    1606              96 :             giststate->supportCollation[i] = index->rd_indcollation[i];
    1607                 :         else
    1608            1852 :             giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
    1609                 :     }
    1610                 : 
    1611                 :     /* No opclass information for INCLUDE attributes */
    1612            2037 :     for (; i < index->rd_att->natts; i++)
    1613                 :     {
    1614             249 :         giststate->consistentFn[i].fn_oid = InvalidOid;
    1615             249 :         giststate->unionFn[i].fn_oid = InvalidOid;
    1616             249 :         giststate->compressFn[i].fn_oid = InvalidOid;
    1617             249 :         giststate->decompressFn[i].fn_oid = InvalidOid;
    1618             249 :         giststate->penaltyFn[i].fn_oid = InvalidOid;
    1619             249 :         giststate->picksplitFn[i].fn_oid = InvalidOid;
    1620             249 :         giststate->equalFn[i].fn_oid = InvalidOid;
    1621             249 :         giststate->distanceFn[i].fn_oid = InvalidOid;
    1622             249 :         giststate->fetchFn[i].fn_oid = InvalidOid;
    1623             249 :         giststate->supportCollation[i] = InvalidOid;
    1624                 :     }
    1625                 : 
    1626            1788 :     MemoryContextSwitchTo(oldCxt);
    1627                 : 
    1628            1788 :     return giststate;
    1629                 : }
    1630                 : 
    1631                 : void
    1632            1523 : freeGISTstate(GISTSTATE *giststate)
    1633                 : {
    1634                 :     /* It's sufficient to delete the scanCxt */
    1635            1523 :     MemoryContextDelete(giststate->scanCxt);
    1636            1523 : }
    1637                 : 
    1638                 : /*
    1639                 :  * gistprunepage() -- try to remove LP_DEAD items from the given page.
    1640                 :  * Function assumes that buffer is exclusively locked.
    1641                 :  */
    1642                 : static void
    1643 UBC           0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
    1644                 : {
    1645                 :     OffsetNumber deletable[MaxIndexTuplesPerPage];
    1646               0 :     int         ndeletable = 0;
    1647                 :     OffsetNumber offnum,
    1648                 :                 maxoff;
    1649                 : 
    1650               0 :     Assert(GistPageIsLeaf(page));
    1651                 : 
    1652                 :     /*
    1653                 :      * Scan over all items to see which ones need to be deleted according to
    1654                 :      * LP_DEAD flags.
    1655                 :      */
    1656               0 :     maxoff = PageGetMaxOffsetNumber(page);
    1657               0 :     for (offnum = FirstOffsetNumber;
    1658               0 :          offnum <= maxoff;
    1659               0 :          offnum = OffsetNumberNext(offnum))
    1660                 :     {
    1661               0 :         ItemId      itemId = PageGetItemId(page, offnum);
    1662                 : 
    1663               0 :         if (ItemIdIsDead(itemId))
    1664               0 :             deletable[ndeletable++] = offnum;
    1665                 :     }
    1666                 : 
    1667               0 :     if (ndeletable > 0)
    1668                 :     {
    1669 UNC           0 :         TransactionId snapshotConflictHorizon = InvalidTransactionId;
    1670                 : 
    1671 UBC           0 :         if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
    1672                 :             snapshotConflictHorizon =
    1673               0 :                 index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
    1674                 :                                                      deletable, ndeletable);
    1675                 : 
    1676               0 :         START_CRIT_SECTION();
    1677                 : 
    1678               0 :         PageIndexMultiDelete(page, deletable, ndeletable);
    1679                 : 
    1680                 :         /*
    1681                 :          * Mark the page as not containing any LP_DEAD items.  This is not
    1682                 :          * certainly true (there might be some that have recently been marked,
    1683                 :          * but weren't included in our target-item list), but it will almost
    1684                 :          * always be true and it doesn't seem worth an additional page scan to
    1685                 :          * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
    1686                 :          */
    1687               0 :         GistClearPageHasGarbage(page);
    1688                 : 
    1689               0 :         MarkBufferDirty(buffer);
    1690                 : 
    1691                 :         /* XLOG stuff */
    1692               0 :         if (RelationNeedsWAL(rel))
    1693               0 :         {
    1694                 :             XLogRecPtr  recptr;
    1695                 : 
    1696               0 :             recptr = gistXLogDelete(buffer,
    1697                 :                                     deletable, ndeletable,
    1698                 :                                     snapshotConflictHorizon,
    1699                 :                                     heapRel);
    1700                 : 
    1701 UIC           0 :             PageSetLSN(page, recptr);
    1702 EUB             :         }
    1703                 :         else
    1704 UIC           0 :             PageSetLSN(page, gistGetFakeLSN(rel));
    1705 EUB             : 
    1706 UIC           0 :         END_CRIT_SECTION();
    1707 EUB             :     }
    1708                 : 
    1709                 :     /*
    1710                 :      * Note: if we didn't find any LP_DEAD items, then the page's
    1711                 :      * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
    1712                 :      * separate write to clear it, however.  We will clear it when we split
    1713                 :      * the page.
    1714                 :      */
    1715 UIC           0 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a