LCOV - differential code coverage report
Current view: top level - src/backend/access/gist - gistbuild.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 96.8 % 435 421 2 11 1 4 262 8 147 9 270 2
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 20 20 20 20
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * gistbuild.c
       4                 :  *    build algorithm for GiST indexes implementation.
       5                 :  *
       6                 :  * There are two different strategies:
       7                 :  *
       8                 :  * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
       9                 :  *    order, and create downlinks and internal pages as we go. This builds
      10                 :  *    the index from the bottom up, similar to how B-tree index build
      11                 :  *    works.
      12                 :  *
      13                 :  * 2. Start with an empty index, and insert all tuples one by one.
      14                 :  *
      15                 :  * The sorted method is used if the operator classes for all columns have
      16                 :  * a 'sortsupport' defined. Otherwise, we resort to the second strategy.
      17                 :  *
      18                 :  * The second strategy can optionally use buffers at different levels of
      19                 :  * the tree to reduce I/O, see "Buffering build algorithm" in the README
      20                 :  * for a more detailed explanation. It initially calls insert over and
      21                 :  * over, but switches to the buffered algorithm after a certain number of
      22                 :  * tuples (unless buffering mode is disabled).
      23                 :  *
      24                 :  *
      25                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      26                 :  * Portions Copyright (c) 1994, Regents of the University of California
      27                 :  *
      28                 :  * IDENTIFICATION
      29                 :  *    src/backend/access/gist/gistbuild.c
      30                 :  *
      31                 :  *-------------------------------------------------------------------------
      32                 :  */
      33                 : #include "postgres.h"
      34                 : 
      35                 : #include <math.h>
      36                 : 
      37                 : #include "access/genam.h"
      38                 : #include "access/gist_private.h"
      39                 : #include "access/gistxlog.h"
      40                 : #include "access/tableam.h"
      41                 : #include "access/xloginsert.h"
      42                 : #include "catalog/index.h"
      43                 : #include "miscadmin.h"
      44                 : #include "optimizer/optimizer.h"
      45                 : #include "storage/bufmgr.h"
      46                 : #include "storage/smgr.h"
      47                 : #include "utils/memutils.h"
      48                 : #include "utils/rel.h"
      49                 : #include "utils/tuplesort.h"
      50                 : 
      51                 : /* Step of index tuples for check whether to switch to buffering build mode */
      52                 : #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
      53                 : 
      54                 : /*
      55                 :  * Number of tuples to process in the slow way before switching to buffering
      56                 :  * mode, when buffering is explicitly turned on. Also, the number of tuples
      57                 :  * to process between readjusting the buffer size parameter, while in
      58                 :  * buffering mode.
      59                 :  */
      60                 : #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
      61                 : 
      62                 : /*
      63                 :  * Strategy used to build the index. It can change between the
      64                 :  * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
      65                 :  * that needs to be decided up-front and cannot be changed afterwards.
      66                 :  */
      67                 : typedef enum
      68                 : {
      69                 :     GIST_SORTED_BUILD,          /* bottom-up build by sorting */
      70                 :     GIST_BUFFERING_DISABLED,    /* in regular build mode and aren't going to
      71                 :                                  * switch */
      72                 :     GIST_BUFFERING_AUTO,        /* in regular build mode, but will switch to
      73                 :                                  * buffering build mode if the index grows too
      74                 :                                  * big */
      75                 :     GIST_BUFFERING_STATS,       /* gathering statistics of index tuple size
      76                 :                                  * before switching to the buffering build
      77                 :                                  * mode */
      78                 :     GIST_BUFFERING_ACTIVE       /* in buffering build mode */
      79                 : } GistBuildMode;
      80                 : 
      81                 : /* Working state for gistbuild and its callback */
      82                 : typedef struct
      83                 : {
      84                 :     Relation    indexrel;
      85                 :     Relation    heaprel;
      86                 :     GISTSTATE  *giststate;
      87                 : 
      88                 :     Size        freespace;      /* amount of free space to leave on pages */
      89                 : 
      90                 :     GistBuildMode buildMode;
      91                 : 
      92                 :     int64       indtuples;      /* number of tuples indexed */
      93                 : 
      94                 :     /*
      95                 :      * Extra data structures used during a buffering build. 'gfbb' contains
      96                 :      * information related to managing the build buffers. 'parentMap' is a
      97                 :      * lookup table of the parent of each internal page.
      98                 :      */
      99                 :     int64       indtuplesSize;  /* total size of all indexed tuples */
     100                 :     GISTBuildBuffers *gfbb;
     101                 :     HTAB       *parentMap;
     102                 : 
     103                 :     /*
     104                 :      * Extra data structures used during a sorting build.
     105                 :      */
     106                 :     Tuplesortstate *sortstate;  /* state data for tuplesort.c */
     107                 : 
     108                 :     BlockNumber pages_allocated;
     109                 :     BlockNumber pages_written;
     110                 : 
     111                 :     int         ready_num_pages;
     112                 :     BlockNumber ready_blknos[XLR_MAX_BLOCK_ID];
     113                 :     Page        ready_pages[XLR_MAX_BLOCK_ID];
     114                 : } GISTBuildState;
     115                 : 
     116                 : #define GIST_SORTED_BUILD_PAGE_NUM 4
     117                 : 
     118                 : /*
     119                 :  * In sorted build, we use a stack of these structs, one for each level,
     120                 :  * to hold an in-memory buffer of last pages at the level.
     121                 :  *
     122                 :  * Sorting GiST build requires good linearization of the sort opclass. This is
     123                 :  * not always the case in multidimensional data. To tackle the anomalies, we
     124                 :  * buffer index tuples and apply picksplit that can be multidimension-aware.
     125                 :  */
     126                 : typedef struct GistSortedBuildLevelState
     127                 : {
     128                 :     int         current_page;
     129                 :     BlockNumber last_blkno;
     130                 :     struct GistSortedBuildLevelState *parent;   /* Upper level, if any */
     131                 :     Page        pages[GIST_SORTED_BUILD_PAGE_NUM];
     132                 : } GistSortedBuildLevelState;
     133                 : 
     134                 : /* prototypes for private functions */
     135                 : 
     136                 : static void gistSortedBuildCallback(Relation index, ItemPointer tid,
     137                 :                                     Datum *values, bool *isnull,
     138                 :                                     bool tupleIsAlive, void *state);
     139                 : static void gist_indexsortbuild(GISTBuildState *state);
     140                 : static void gist_indexsortbuild_levelstate_add(GISTBuildState *state,
     141                 :                                                GistSortedBuildLevelState *levelstate,
     142                 :                                                IndexTuple itup);
     143                 : static void gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
     144                 :                                                  GistSortedBuildLevelState *levelstate);
     145                 : static void gist_indexsortbuild_flush_ready_pages(GISTBuildState *state);
     146                 : 
     147                 : static void gistInitBuffering(GISTBuildState *buildstate);
     148                 : static int  calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
     149                 : static void gistBuildCallback(Relation index,
     150                 :                               ItemPointer tid,
     151                 :                               Datum *values,
     152                 :                               bool *isnull,
     153                 :                               bool tupleIsAlive,
     154                 :                               void *state);
     155                 : static void gistBufferingBuildInsert(GISTBuildState *buildstate,
     156                 :                                      IndexTuple itup);
     157                 : static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
     158                 :                             BlockNumber startblkno, int startlevel);
     159                 : static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate,
     160                 :                                              Buffer buffer, int level,
     161                 :                                              IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
     162                 :                                              BlockNumber parentblk, OffsetNumber downlinkoffnum);
     163                 : static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate,
     164                 :                                              BlockNumber childblkno, int level,
     165                 :                                              BlockNumber *parentblkno,
     166                 :                                              OffsetNumber *downlinkoffnum);
     167                 : static void gistProcessEmptyingQueue(GISTBuildState *buildstate);
     168                 : static void gistEmptyAllBuffers(GISTBuildState *buildstate);
     169                 : static int  gistGetMaxLevel(Relation index);
     170                 : 
     171                 : static void gistInitParentMap(GISTBuildState *buildstate);
     172                 : static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
     173                 :                                BlockNumber parent);
     174                 : static void gistMemorizeAllDownlinks(GISTBuildState *buildstate,
     175                 :                                      Buffer parentbuf);
     176                 : static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
     177                 : 
     178                 : 
     179                 : /*
     180                 :  * Main entry point to GiST index build.
     181                 :  */
     182                 : IndexBuildResult *
     183 GIC         293 : gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
     184 ECB             : {
     185                 :     IndexBuildResult *result;
     186                 :     double      reltuples;
     187                 :     GISTBuildState buildstate;
     188 GIC         293 :     MemoryContext oldcxt = CurrentMemoryContext;
     189 ECB             :     int         fillfactor;
     190                 :     Oid         SortSupportFnOids[INDEX_MAX_KEYS];
     191 GIC         293 :     GiSTOptions *options = (GiSTOptions *) index->rd_options;
     192 ECB             : 
     193                 :     /*
     194                 :      * We expect to be called exactly once for any index relation. If that's
     195                 :      * not the case, big trouble's what we have.
     196                 :      */
     197 GIC         293 :     if (RelationGetNumberOfBlocks(index) != 0)
     198 LBC           0 :         elog(ERROR, "index \"%s\" already contains data",
     199 EUB             :              RelationGetRelationName(index));
     200                 : 
     201 GIC         293 :     buildstate.indexrel = index;
     202 CBC         293 :     buildstate.heaprel = heap;
     203             293 :     buildstate.sortstate = NULL;
     204             293 :     buildstate.giststate = initGISTstate(index);
     205 ECB             : 
     206                 :     /*
     207                 :      * Create a temporary memory context that is reset once for each tuple
     208                 :      * processed.  (Note: we don't bother to make this a child of the
     209                 :      * giststate's scanCxt, so we have to delete it separately at the end.)
     210                 :      */
     211 GIC         293 :     buildstate.giststate->tempCxt = createTempGistContext();
     212 ECB             : 
     213                 :     /*
     214                 :      * Choose build strategy.  First check whether the user specified to use
     215                 :      * buffering mode.  (The use-case for that in the field is somewhat
     216                 :      * questionable perhaps, but it's important for testing purposes.)
     217                 :      */
     218 GIC         293 :     if (options)
     219 ECB             :     {
     220 GIC          16 :         if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
     221 CBC           6 :             buildstate.buildMode = GIST_BUFFERING_STATS;
     222              10 :         else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
     223               3 :             buildstate.buildMode = GIST_BUFFERING_DISABLED;
     224 ECB             :         else                    /* must be "auto" */
     225 GIC           7 :             buildstate.buildMode = GIST_BUFFERING_AUTO;
     226 ECB             :     }
     227                 :     else
     228                 :     {
     229 GIC         277 :         buildstate.buildMode = GIST_BUFFERING_AUTO;
     230 ECB             :     }
     231                 : 
     232                 :     /*
     233                 :      * Unless buffering mode was forced, see if we can use sorting instead.
     234                 :      */
     235 GIC         293 :     if (buildstate.buildMode != GIST_BUFFERING_STATS)
     236 ECB             :     {
     237 GIC         287 :         bool        hasallsortsupports = true;
     238 CBC         287 :         int         keyscount = IndexRelationGetNumberOfKeyAttributes(index);
     239 ECB             : 
     240 GIC         359 :         for (int i = 0; i < keyscount; i++)
     241 ECB             :         {
     242 GIC         290 :             SortSupportFnOids[i] = index_getprocid(index, i + 1,
     243 ECB             :                                                    GIST_SORTSUPPORT_PROC);
     244 GIC         290 :             if (!OidIsValid(SortSupportFnOids[i]))
     245 ECB             :             {
     246 GIC         218 :                 hasallsortsupports = false;
     247 CBC         218 :                 break;
     248 ECB             :             }
     249                 :         }
     250 GIC         287 :         if (hasallsortsupports)
     251 CBC          69 :             buildstate.buildMode = GIST_SORTED_BUILD;
     252 ECB             :     }
     253                 : 
     254                 :     /*
     255                 :      * Calculate target amount of free space to leave on pages.
     256                 :      */
     257 GIC         293 :     fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
     258 CBC         293 :     buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
     259 ECB             : 
     260                 :     /*
     261                 :      * Build the index using the chosen strategy.
     262                 :      */
     263 GIC         293 :     buildstate.indtuples = 0;
     264 CBC         293 :     buildstate.indtuplesSize = 0;
     265 ECB             : 
     266 GIC         293 :     if (buildstate.buildMode == GIST_SORTED_BUILD)
     267 ECB             :     {
     268                 :         /*
     269                 :          * Sort all data, build the index from bottom up.
     270                 :          */
     271 GIC          69 :         buildstate.sortstate = tuplesort_begin_index_gist(heap,
     272 ECB             :                                                           index,
     273                 :                                                           maintenance_work_mem,
     274                 :                                                           NULL,
     275                 :                                                           TUPLESORT_NONE);
     276                 : 
     277                 :         /* Scan the table, adding all tuples to the tuplesort */
     278 GIC          69 :         reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     279 ECB             :                                            gistSortedBuildCallback,
     280                 :                                            (void *) &buildstate, NULL);
     281                 : 
     282                 :         /*
     283                 :          * Perform the sort and build index pages.
     284                 :          */
     285 GIC          69 :         tuplesort_performsort(buildstate.sortstate);
     286 ECB             : 
     287 GIC          69 :         gist_indexsortbuild(&buildstate);
     288 ECB             : 
     289 GIC          69 :         tuplesort_end(buildstate.sortstate);
     290 ECB             :     }
     291                 :     else
     292                 :     {
     293                 :         /*
     294                 :          * Initialize an empty index and insert all tuples, possibly using
     295                 :          * buffers on intermediate levels.
     296                 :          */
     297                 :         Buffer      buffer;
     298                 :         Page        page;
     299                 : 
     300                 :         /* initialize the root page */
     301 GNC         224 :         buffer = gistNewBuffer(index, heap);
     302 CBC         224 :         Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
     303             224 :         page = BufferGetPage(buffer);
     304 ECB             : 
     305 GIC         224 :         START_CRIT_SECTION();
     306 ECB             : 
     307 GIC         224 :         GISTInitBuffer(buffer, F_LEAF);
     308 ECB             : 
     309 GIC         224 :         MarkBufferDirty(buffer);
     310 CBC         224 :         PageSetLSN(page, GistBuildLSN);
     311 ECB             : 
     312 GIC         224 :         UnlockReleaseBuffer(buffer);
     313 ECB             : 
     314 GIC         224 :         END_CRIT_SECTION();
     315 ECB             : 
     316                 :         /* Scan the table, inserting all the tuples to the index. */
     317 GIC         224 :         reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
     318 ECB             :                                            gistBuildCallback,
     319                 :                                            (void *) &buildstate, NULL);
     320                 : 
     321                 :         /*
     322                 :          * If buffering was used, flush out all the tuples that are still in
     323                 :          * the buffers.
     324                 :          */
     325 GIC         224 :         if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
     326 ECB             :         {
     327 GIC           3 :             elog(DEBUG1, "all tuples processed, emptying buffers");
     328 CBC           3 :             gistEmptyAllBuffers(&buildstate);
     329               3 :             gistFreeBuildBuffers(buildstate.gfbb);
     330 ECB             :         }
     331                 : 
     332                 :         /*
     333                 :          * We didn't write WAL records as we built the index, so if
     334                 :          * WAL-logging is required, write all pages to the WAL now.
     335                 :          */
     336 GIC         224 :         if (RelationNeedsWAL(index))
     337 ECB             :         {
     338 GIC         132 :             log_newpage_range(index, MAIN_FORKNUM,
     339 ECB             :                               0, RelationGetNumberOfBlocks(index),
     340                 :                               true);
     341                 :         }
     342                 :     }
     343                 : 
     344                 :     /* okay, all heap tuples are indexed */
     345 GIC         293 :     MemoryContextSwitchTo(oldcxt);
     346 CBC         293 :     MemoryContextDelete(buildstate.giststate->tempCxt);
     347 ECB             : 
     348 GIC         293 :     freeGISTstate(buildstate.giststate);
     349 ECB             : 
     350                 :     /*
     351                 :      * Return statistics
     352                 :      */
     353 GIC         293 :     result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
     354 ECB             : 
     355 GIC         293 :     result->heap_tuples = reltuples;
     356 CBC         293 :     result->index_tuples = (double) buildstate.indtuples;
     357 ECB             : 
     358 GIC         293 :     return result;
     359 ECB             : }
     360                 : 
     361                 : /*-------------------------------------------------------------------------
     362                 :  * Routines for sorted build
     363                 :  *-------------------------------------------------------------------------
     364                 :  */
     365                 : 
     366                 : /*
     367                 :  * Per-tuple callback for table_index_build_scan.
     368                 :  */
     369                 : static void
     370 GIC       97072 : gistSortedBuildCallback(Relation index,
     371 ECB             :                         ItemPointer tid,
     372                 :                         Datum *values,
     373                 :                         bool *isnull,
     374                 :                         bool tupleIsAlive,
     375                 :                         void *state)
     376                 : {
     377 GIC       97072 :     GISTBuildState *buildstate = (GISTBuildState *) state;
     378 ECB             :     MemoryContext oldCtx;
     379                 :     Datum       compressed_values[INDEX_MAX_KEYS];
     380                 : 
     381 GIC       97072 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     382 ECB             : 
     383                 :     /* Form an index tuple and point it at the heap tuple */
     384 GIC       97072 :     gistCompressValues(buildstate->giststate, index,
     385 ECB             :                        values, isnull,
     386                 :                        true, compressed_values);
     387                 : 
     388 GIC       97072 :     tuplesort_putindextuplevalues(buildstate->sortstate,
     389 ECB             :                                   buildstate->indexrel,
     390                 :                                   tid,
     391                 :                                   compressed_values, isnull);
     392                 : 
     393 GIC       97072 :     MemoryContextSwitchTo(oldCtx);
     394 CBC       97072 :     MemoryContextReset(buildstate->giststate->tempCxt);
     395 ECB             : 
     396                 :     /* Update tuple count. */
     397 GIC       97072 :     buildstate->indtuples += 1;
     398 CBC       97072 : }
     399 ECB             : 
     400                 : /*
     401                 :  * Build GiST index from bottom up from pre-sorted tuples.
     402                 :  */
     403                 : static void
     404 GIC          69 : gist_indexsortbuild(GISTBuildState *state)
     405 ECB             : {
     406                 :     IndexTuple  itup;
     407                 :     GistSortedBuildLevelState *levelstate;
     408                 :     Page        page;
     409                 : 
     410 GIC          69 :     state->pages_allocated = 0;
     411 CBC          69 :     state->pages_written = 0;
     412              69 :     state->ready_num_pages = 0;
     413 ECB             : 
     414                 :     /*
     415                 :      * Write an empty page as a placeholder for the root page. It will be
     416                 :      * replaced with the real root page at the end.
     417                 :      */
     418 GNC          69 :     page = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, MCXT_ALLOC_ZERO);
     419 CBC          69 :     smgrextend(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, GIST_ROOT_BLKNO,
     420 ECB             :                page, true);
     421 GIC          69 :     state->pages_allocated++;
     422 CBC          69 :     state->pages_written++;
     423 ECB             : 
     424                 :     /* Allocate a temporary buffer for the first leaf page batch. */
     425 GIC          69 :     levelstate = palloc0(sizeof(GistSortedBuildLevelState));
     426 CBC          69 :     levelstate->pages[0] = page;
     427              69 :     levelstate->parent = NULL;
     428              69 :     gistinitpage(page, F_LEAF);
     429 ECB             : 
     430                 :     /*
     431                 :      * Fill index pages with tuples in the sorted order.
     432                 :      */
     433 GIC       97141 :     while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
     434 ECB             :     {
     435 GIC       97072 :         gist_indexsortbuild_levelstate_add(state, levelstate, itup);
     436 CBC       97072 :         MemoryContextReset(state->giststate->tempCxt);
     437 ECB             :     }
     438                 : 
     439                 :     /*
     440                 :      * Write out the partially full non-root pages.
     441                 :      *
     442                 :      * Keep in mind that flush can build a new root. If number of pages is > 1
     443                 :      * then new root is required.
     444                 :      */
     445 GIC          82 :     while (levelstate->parent != NULL || levelstate->current_page != 0)
     446 ECB             :     {
     447                 :         GistSortedBuildLevelState *parent;
     448                 : 
     449 GIC          13 :         gist_indexsortbuild_levelstate_flush(state, levelstate);
     450 CBC          13 :         parent = levelstate->parent;
     451              65 :         for (int i = 0; i < GIST_SORTED_BUILD_PAGE_NUM; i++)
     452              52 :             if (levelstate->pages[i])
     453              52 :                 pfree(levelstate->pages[i]);
     454              13 :         pfree(levelstate);
     455              13 :         levelstate = parent;
     456 ECB             :     }
     457                 : 
     458 GIC          69 :     gist_indexsortbuild_flush_ready_pages(state);
     459 ECB             : 
     460                 :     /* Write out the root */
     461 GIC          69 :     PageSetLSN(levelstate->pages[0], GistBuildLSN);
     462 CBC          69 :     PageSetChecksumInplace(levelstate->pages[0], GIST_ROOT_BLKNO);
     463              69 :     smgrwrite(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, GIST_ROOT_BLKNO,
     464              69 :               levelstate->pages[0], true);
     465              69 :     if (RelationNeedsWAL(state->indexrel))
     466 GNC          53 :         log_newpage(&state->indexrel->rd_locator, MAIN_FORKNUM, GIST_ROOT_BLKNO,
     467 ECB             :                     levelstate->pages[0], true);
     468                 : 
     469 GIC          69 :     pfree(levelstate->pages[0]);
     470 CBC          69 :     pfree(levelstate);
     471 ECB             : 
     472                 :     /*
     473                 :      * When we WAL-logged index pages, we must nonetheless fsync index files.
     474                 :      * Since we're building outside shared buffers, a CHECKPOINT occurring
     475                 :      * during the build has no way to flush the previously written data to
     476                 :      * disk (indeed it won't know the index even exists).  A crash later on
     477                 :      * would replay WAL from the checkpoint, therefore it wouldn't replay our
     478                 :      * earlier WAL entries. If we do not fsync those pages here, they might
     479                 :      * still not be on disk when the crash occurs.
     480                 :      */
     481 GIC          69 :     if (RelationNeedsWAL(state->indexrel))
     482 CBC          53 :         smgrimmedsync(RelationGetSmgr(state->indexrel), MAIN_FORKNUM);
     483              69 : }
     484 ECB             : 
     485                 : /*
     486                 :  * Add tuple to a page. If the pages are full, write them out and re-initialize
     487                 :  * a new page first.
     488                 :  */
     489                 : static void
     490 GIC       97771 : gist_indexsortbuild_levelstate_add(GISTBuildState *state,
     491 ECB             :                                    GistSortedBuildLevelState *levelstate,
     492                 :                                    IndexTuple itup)
     493                 : {
     494                 :     Size        sizeNeeded;
     495                 : 
     496                 :     /* Check if tuple can be added to the current page */
     497 GIC       97771 :     sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData); /* fillfactor ignored */
     498 CBC       97771 :     if (PageGetFreeSpace(levelstate->pages[levelstate->current_page]) < sizeNeeded)
     499 ECB             :     {
     500                 :         Page        newPage;
     501 GIC         521 :         Page        old_page = levelstate->pages[levelstate->current_page];
     502 CBC         521 :         uint16      old_page_flags = GistPageGetOpaque(old_page)->flags;
     503 ECB             : 
     504 GIC         521 :         if (levelstate->current_page + 1 == GIST_SORTED_BUILD_PAGE_NUM)
     505 ECB             :         {
     506 GIC         127 :             gist_indexsortbuild_levelstate_flush(state, levelstate);
     507 ECB             :         }
     508                 :         else
     509 GIC         394 :             levelstate->current_page++;
     510 ECB             : 
     511 GIC         521 :         if (levelstate->pages[levelstate->current_page] == NULL)
     512 GNC          39 :             levelstate->pages[levelstate->current_page] =
     513              39 :                 palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
     514 ECB             : 
     515 CBC         521 :         newPage = levelstate->pages[levelstate->current_page];
     516 GIC         521 :         gistinitpage(newPage, old_page_flags);
     517 ECB             :     }
     518                 : 
     519 GIC       97771 :     gistfillbuffer(levelstate->pages[levelstate->current_page], &itup, 1, InvalidOffsetNumber);
     520           97771 : }
     521 ECB             : 
     522                 : static void
     523 GIC         140 : gist_indexsortbuild_levelstate_flush(GISTBuildState *state,
     524                 :                                      GistSortedBuildLevelState *levelstate)
     525 ECB             : {
     526                 :     GistSortedBuildLevelState *parent;
     527                 :     BlockNumber blkno;
     528                 :     MemoryContext oldCtx;
     529                 :     IndexTuple  union_tuple;
     530                 :     SplitedPageLayout *dist;
     531                 :     IndexTuple *itvec;
     532                 :     int         vect_len;
     533 GIC         140 :     bool        isleaf = GistPageIsLeaf(levelstate->pages[0]);
     534                 : 
     535 CBC         140 :     CHECK_FOR_INTERRUPTS();
     536                 : 
     537             140 :     oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
     538                 : 
     539 ECB             :     /* Get index tuples from first page */
     540 GIC         140 :     itvec = gistextractpage(levelstate->pages[0], &vect_len);
     541             140 :     if (levelstate->current_page > 0)
     542 ECB             :     {
     543                 :         /* Append tuples from each page */
     544 GIC         531 :         for (int i = 1; i < levelstate->current_page + 1; i++)
     545                 :         {
     546 ECB             :             int         len_local;
     547 GIC         394 :             IndexTuple *itvec_local = gistextractpage(levelstate->pages[i], &len_local);
     548                 : 
     549 CBC         394 :             itvec = gistjoinvector(itvec, &vect_len, itvec_local, len_local);
     550 GIC         394 :             pfree(itvec_local);
     551 ECB             :         }
     552                 : 
     553                 :         /* Apply picksplit to list of all collected tuples */
     554 GIC         137 :         dist = gistSplit(state->indexrel, levelstate->pages[0], itvec, vect_len, state->giststate);
     555                 :     }
     556 ECB             :     else
     557                 :     {
     558                 :         /* Create splitted layout from single page */
     559 GIC           3 :         dist = (SplitedPageLayout *) palloc0(sizeof(SplitedPageLayout));
     560               3 :         union_tuple = gistunion(state->indexrel, itvec, vect_len,
     561 ECB             :                                 state->giststate);
     562 CBC           3 :         dist->itup = union_tuple;
     563 GIC           3 :         dist->list = gistfillitupvec(itvec, vect_len, &(dist->lenlist));
     564 CBC           3 :         dist->block.num = vect_len;
     565 ECB             :     }
     566                 : 
     567 GIC         140 :     MemoryContextSwitchTo(oldCtx);
     568                 : 
     569 ECB             :     /* Reset page counter */
     570 GIC         140 :     levelstate->current_page = 0;
     571                 : 
     572 ECB             :     /* Create pages for all partitions in split result */
     573 GIC         839 :     for (; dist != NULL; dist = dist->next)
     574                 :     {
     575 ECB             :         char       *data;
     576                 :         Page        target;
     577                 : 
     578                 :         /* check once per page */
     579 GIC         699 :         CHECK_FOR_INTERRUPTS();
     580                 : 
     581 ECB             :         /* Create page and copy data */
     582 GIC         699 :         data = (char *) (dist->list);
     583 GNC         699 :         target = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, MCXT_ALLOC_ZERO);
     584 CBC         699 :         gistinitpage(target, isleaf ? F_LEAF : 0);
     585           97705 :         for (int i = 0; i < dist->block.num; i++)
     586 ECB             :         {
     587 CBC       97006 :             IndexTuple  thistup = (IndexTuple) data;
     588                 : 
     589           97006 :             if (PageAddItem(target, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
     590 UIC           0 :                 elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(state->indexrel));
     591 ECB             : 
     592 GBC       97006 :             data += IndexTupleSize(thistup);
     593                 :         }
     594 CBC         699 :         union_tuple = dist->itup;
     595                 : 
     596             699 :         if (state->ready_num_pages == XLR_MAX_BLOCK_ID)
     597 GIC          18 :             gist_indexsortbuild_flush_ready_pages(state);
     598 ECB             : 
     599                 :         /*
     600                 :          * The page is now complete. Assign a block number to it, and add it
     601                 :          * to the list of finished pages. (We don't write it out immediately,
     602                 :          * because we want to WAL-log the pages in batches.)
     603                 :          */
     604 GIC         699 :         blkno = state->pages_allocated++;
     605             699 :         state->ready_blknos[state->ready_num_pages] = blkno;
     606 CBC         699 :         state->ready_pages[state->ready_num_pages] = target;
     607             699 :         state->ready_num_pages++;
     608             699 :         ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
     609 ECB             : 
     610                 :         /*
     611                 :          * Set the right link to point to the previous page. This is just for
     612                 :          * debugging purposes: GiST only follows the right link if a page is
     613                 :          * split concurrently to a scan, and that cannot happen during index
     614                 :          * build.
     615                 :          *
     616                 :          * It's a bit counterintuitive that we set the right link on the new
     617                 :          * page to point to the previous page, not the other way around. But
     618                 :          * GiST pages are not ordered like B-tree pages are, so as long as the
     619                 :          * right-links form a chain through all the pages at the same level,
     620                 :          * the order doesn't matter.
     621                 :          */
     622 GIC         699 :         if (levelstate->last_blkno)
     623             686 :             GistPageGetOpaque(target)->rightlink = levelstate->last_blkno;
     624 CBC         699 :         levelstate->last_blkno = blkno;
     625 ECB             : 
     626                 :         /*
     627                 :          * Insert the downlink to the parent page. If this was the root,
     628                 :          * create a new page as the parent, which becomes the new root.
     629                 :          */
     630 GIC         699 :         parent = levelstate->parent;
     631             699 :         if (parent == NULL)
     632 ECB             :         {
     633 CBC          13 :             parent = palloc0(sizeof(GistSortedBuildLevelState));
     634 GNC          13 :             parent->pages[0] = (Page) palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
     635 CBC          13 :             parent->parent = NULL;
     636              13 :             gistinitpage(parent->pages[0], 0);
     637 ECB             : 
     638 CBC          13 :             levelstate->parent = parent;
     639                 :         }
     640             699 :         gist_indexsortbuild_levelstate_add(state, parent, union_tuple);
     641                 :     }
     642             140 : }
     643                 : 
     644 ECB             : static void
     645 GIC          87 : gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
     646                 : {
     647 CBC          87 :     if (state->ready_num_pages == 0)
     648 GIC          56 :         return;
     649 ECB             : 
     650 CBC         730 :     for (int i = 0; i < state->ready_num_pages; i++)
     651                 :     {
     652             699 :         Page        page = state->ready_pages[i];
     653 GIC         699 :         BlockNumber blkno = state->ready_blknos[i];
     654 ECB             : 
     655                 :         /* Currently, the blocks must be buffered in order. */
     656 GIC         699 :         if (blkno != state->pages_written)
     657 UIC           0 :             elog(ERROR, "unexpected block number to flush GiST sorting build");
     658 ECB             : 
     659 GBC         699 :         PageSetLSN(page, GistBuildLSN);
     660 GIC         699 :         PageSetChecksumInplace(page, blkno);
     661 CBC         699 :         smgrextend(RelationGetSmgr(state->indexrel), MAIN_FORKNUM, blkno, page,
     662 ECB             :                    true);
     663                 : 
     664 GIC         699 :         state->pages_written++;
     665                 :     }
     666 ECB             : 
     667 GIC          31 :     if (RelationNeedsWAL(state->indexrel))
     668 GNC          19 :         log_newpages(&state->indexrel->rd_locator, MAIN_FORKNUM, state->ready_num_pages,
     669 CBC          19 :                      state->ready_blknos, state->ready_pages, true);
     670 ECB             : 
     671 CBC         730 :     for (int i = 0; i < state->ready_num_pages; i++)
     672 GIC         699 :         pfree(state->ready_pages[i]);
     673 ECB             : 
     674 CBC          31 :     state->ready_num_pages = 0;
     675                 : }
     676 ECB             : 
     677                 : 
     678                 : /*-------------------------------------------------------------------------
     679                 :  * Routines for non-sorted build
     680                 :  *-------------------------------------------------------------------------
     681                 :  */
     682                 : 
     683                 : /*
     684                 :  * Attempt to switch to buffering mode.
     685                 :  *
     686                 :  * If there is not enough memory for buffering build, sets bufferingMode
     687                 :  * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch
     688                 :  * anymore. Otherwise initializes the build buffers, and sets bufferingMode to
     689                 :  * GIST_BUFFERING_ACTIVE.
     690                 :  */
     691                 : static void
     692 GIC           3 : gistInitBuffering(GISTBuildState *buildstate)
     693                 : {
     694 CBC           3 :     Relation    index = buildstate->indexrel;
     695                 :     int         pagesPerBuffer;
     696 ECB             :     Size        pageFreeSpace;
     697                 :     Size        itupAvgSize,
     698                 :                 itupMinSize;
     699                 :     double      avgIndexTuplesPerPage,
     700                 :                 maxIndexTuplesPerPage;
     701                 :     int         i;
     702                 :     int         levelStep;
     703                 : 
     704                 :     /* Calc space of index page which is available for index tuples */
     705 GIC           3 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     706                 :         - sizeof(ItemIdData)
     707 CBC           3 :         - buildstate->freespace;
     708                 : 
     709 ECB             :     /*
     710                 :      * Calculate average size of already inserted index tuples using gathered
     711                 :      * statistics.
     712                 :      */
     713 GIC           3 :     itupAvgSize = (double) buildstate->indtuplesSize /
     714               3 :         (double) buildstate->indtuples;
     715 ECB             : 
     716                 :     /*
     717                 :      * Calculate minimal possible size of index tuple by index metadata.
     718                 :      * Minimal possible size of varlena is VARHDRSZ.
     719                 :      *
     720                 :      * XXX: that's not actually true, as a short varlen can be just 2 bytes.
     721                 :      * And we should take padding into account here.
     722                 :      */
     723 GIC           3 :     itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData));
     724               6 :     for (i = 0; i < index->rd_att->natts; i++)
     725 ECB             :     {
     726 CBC           3 :         if (TupleDescAttr(index->rd_att, i)->attlen < 0)
     727 UIC           0 :             itupMinSize += VARHDRSZ;
     728 ECB             :         else
     729 GBC           3 :             itupMinSize += TupleDescAttr(index->rd_att, i)->attlen;
     730                 :     }
     731 ECB             : 
     732                 :     /* Calculate average and maximal number of index tuples which fit to page */
     733 GIC           3 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     734               3 :     maxIndexTuplesPerPage = pageFreeSpace / itupMinSize;
     735 ECB             : 
     736                 :     /*
     737                 :      * We need to calculate two parameters for the buffering algorithm:
     738                 :      * levelStep and pagesPerBuffer.
     739                 :      *
     740                 :      * levelStep determines the size of subtree that we operate on, while
     741                 :      * emptying a buffer. A higher value is better, as you need fewer buffer
     742                 :      * emptying steps to build the index. However, if you set it too high, the
     743                 :      * subtree doesn't fit in cache anymore, and you quickly lose the benefit
     744                 :      * of the buffers.
     745                 :      *
     746                 :      * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is
     747                 :      * the number of tuples on page (ie. fanout), and M is the amount of
     748                 :      * internal memory available. Curiously, they doesn't explain *why* that
     749                 :      * setting is optimal. We calculate it by taking the highest levelStep so
     750                 :      * that a subtree still fits in cache. For a small B, our way of
     751                 :      * calculating levelStep is very close to Arge et al's formula. For a
     752                 :      * large B, our formula gives a value that is 2x higher.
     753                 :      *
     754                 :      * The average size (in pages) of a subtree of depth n can be calculated
     755                 :      * as a geometric series:
     756                 :      *
     757                 :      * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B)
     758                 :      *
     759                 :      * where B is the average number of index tuples on page. The subtree is
     760                 :      * cached in the shared buffer cache and the OS cache, so we choose
     761                 :      * levelStep so that the subtree size is comfortably smaller than
     762                 :      * effective_cache_size, with a safety factor of 4.
     763                 :      *
     764                 :      * The estimate on the average number of index tuples on page is based on
     765                 :      * average tuple sizes observed before switching to buffered build, so the
     766                 :      * real subtree size can be somewhat larger. Also, it would selfish to
     767                 :      * gobble the whole cache for our index build. The safety factor of 4
     768                 :      * should account for those effects.
     769                 :      *
     770                 :      * The other limiting factor for setting levelStep is that while
     771                 :      * processing a subtree, we need to hold one page for each buffer at the
     772                 :      * next lower buffered level. The max. number of buffers needed for that
     773                 :      * is maxIndexTuplesPerPage^levelStep. This is very conservative, but
     774                 :      * hopefully maintenance_work_mem is set high enough that you're
     775                 :      * constrained by effective_cache_size rather than maintenance_work_mem.
     776                 :      *
     777                 :      * XXX: the buffer hash table consumes a fair amount of memory too per
     778                 :      * buffer, but that is not currently taken into account. That scales on
     779                 :      * the total number of buffers used, ie. the index size and on levelStep.
     780                 :      * Note that a higher levelStep *reduces* the amount of memory needed for
     781                 :      * the hash table.
     782                 :      */
     783 GIC           3 :     levelStep = 1;
     784                 :     for (;;)
     785 CBC           3 :     {
     786                 :         double      subtreesize;
     787 ECB             :         double      maxlowestlevelpages;
     788                 : 
     789                 :         /* size of an average subtree at this levelStep (in pages). */
     790 GIC           6 :         subtreesize =
     791               6 :             (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) /
     792 CBC           6 :             (1 - avgIndexTuplesPerPage);
     793 ECB             : 
     794                 :         /* max number of pages at the lowest level of a subtree */
     795 GIC           6 :         maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep);
     796                 : 
     797 ECB             :         /* subtree must fit in cache (with safety factor of 4) */
     798 GIC           6 :         if (subtreesize > effective_cache_size / 4)
     799 UIC           0 :             break;
     800 ECB             : 
     801 EUB             :         /* each node in the lowest level of a subtree has one page in memory */
     802 GIC           6 :         if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ)
     803               3 :             break;
     804 ECB             : 
     805                 :         /* Good, we can handle this levelStep. See if we can go one higher. */
     806 GIC           3 :         levelStep++;
     807                 :     }
     808 ECB             : 
     809                 :     /*
     810                 :      * We just reached an unacceptable value of levelStep in previous loop.
     811                 :      * So, decrease levelStep to get last acceptable value.
     812                 :      */
     813 GIC           3 :     levelStep--;
     814                 : 
     815 ECB             :     /*
     816                 :      * If there's not enough cache or maintenance_work_mem, fall back to plain
     817                 :      * inserts.
     818                 :      */
     819 GIC           3 :     if (levelStep <= 0)
     820                 :     {
     821 LBC           0 :         elog(DEBUG1, "failed to switch to buffered GiST build");
     822 UIC           0 :         buildstate->buildMode = GIST_BUFFERING_DISABLED;
     823 UBC           0 :         return;
     824 EUB             :     }
     825                 : 
     826                 :     /*
     827                 :      * The second parameter to set is pagesPerBuffer, which determines the
     828                 :      * size of each buffer. We adjust pagesPerBuffer also during the build,
     829                 :      * which is why this calculation is in a separate function.
     830                 :      */
     831 GIC           3 :     pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep);
     832                 : 
     833 ECB             :     /* Initialize GISTBuildBuffers with these parameters */
     834 GIC           3 :     buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep,
     835                 :                                             gistGetMaxLevel(index));
     836 ECB             : 
     837 GIC           3 :     gistInitParentMap(buildstate);
     838                 : 
     839 CBC           3 :     buildstate->buildMode = GIST_BUFFERING_ACTIVE;
     840                 : 
     841               3 :     elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
     842                 :          levelStep, pagesPerBuffer);
     843 ECB             : }
     844                 : 
     845                 : /*
     846                 :  * Calculate pagesPerBuffer parameter for the buffering algorithm.
     847                 :  *
     848                 :  * Buffer size is chosen so that assuming that tuples are distributed
     849                 :  * randomly, emptying half a buffer fills on average one page in every buffer
     850                 :  * at the next lower level.
     851                 :  */
     852                 : static int
     853 GIC           6 : calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep)
     854                 : {
     855 ECB             :     double      pagesPerBuffer;
     856                 :     double      avgIndexTuplesPerPage;
     857                 :     double      itupAvgSize;
     858                 :     Size        pageFreeSpace;
     859                 : 
     860                 :     /* Calc space of index page which is available for index tuples */
     861 GIC           6 :     pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData)
     862                 :         - sizeof(ItemIdData)
     863 CBC           6 :         - buildstate->freespace;
     864                 : 
     865 ECB             :     /*
     866                 :      * Calculate average size of already inserted index tuples using gathered
     867                 :      * statistics.
     868                 :      */
     869 GIC           6 :     itupAvgSize = (double) buildstate->indtuplesSize /
     870               6 :         (double) buildstate->indtuples;
     871 ECB             : 
     872 CBC           6 :     avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize;
     873                 : 
     874 ECB             :     /*
     875                 :      * Recalculate required size of buffers.
     876                 :      */
     877 GIC           6 :     pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep);
     878                 : 
     879 CBC           6 :     return (int) rint(pagesPerBuffer);
     880                 : }
     881 ECB             : 
     882                 : /*
     883                 :  * Per-tuple callback for table_index_build_scan.
     884                 :  */
     885                 : static void
     886 GIC      321974 : gistBuildCallback(Relation index,
     887                 :                   ItemPointer tid,
     888 ECB             :                   Datum *values,
     889                 :                   bool *isnull,
     890                 :                   bool tupleIsAlive,
     891                 :                   void *state)
     892                 : {
     893 GIC      321974 :     GISTBuildState *buildstate = (GISTBuildState *) state;
     894                 :     IndexTuple  itup;
     895 ECB             :     MemoryContext oldCtx;
     896                 : 
     897 GIC      321974 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
     898                 : 
     899 ECB             :     /* form an index tuple and point it at the heap tuple */
     900 GIC      321974 :     itup = gistFormTuple(buildstate->giststate, index,
     901                 :                          values, isnull,
     902 ECB             :                          true);
     903 GIC      321974 :     itup->t_tid = *tid;
     904                 : 
     905 ECB             :     /* Update tuple count and total size. */
     906 GIC      321974 :     buildstate->indtuples += 1;
     907          321974 :     buildstate->indtuplesSize += IndexTupleSize(itup);
     908 ECB             : 
     909                 :     /*
     910                 :      * XXX In buffering builds, the tempCxt is also reset down inside
     911                 :      * gistProcessEmptyingQueue().  This is not great because it risks
     912                 :      * confusion and possible use of dangling pointers (for example, itup
     913                 :      * might be already freed when control returns here).  It's generally
     914                 :      * better that a memory context be "owned" by only one function.  However,
     915                 :      * currently this isn't causing issues so it doesn't seem worth the amount
     916                 :      * of refactoring that would be needed to avoid it.
     917                 :      */
     918 GIC      321974 :     if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
     919                 :     {
     920 ECB             :         /* We have buffers, so use them. */
     921 GIC       17715 :         gistBufferingBuildInsert(buildstate, itup);
     922                 :     }
     923 ECB             :     else
     924                 :     {
     925                 :         /*
     926                 :          * There's no buffers (yet). Since we already have the index relation
     927                 :          * locked, we call gistdoinsert directly.
     928                 :          */
     929 GIC      304259 :         gistdoinsert(index, itup, buildstate->freespace,
     930                 :                      buildstate->giststate, buildstate->heaprel, true);
     931 ECB             :     }
     932                 : 
     933 GIC      321974 :     MemoryContextSwitchTo(oldCtx);
     934          321974 :     MemoryContextReset(buildstate->giststate->tempCxt);
     935 ECB             : 
     936 CBC      321974 :     if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
     937 GIC       17715 :         buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
     938 ECB             :     {
     939                 :         /* Adjust the target buffer size now */
     940 GIC           3 :         buildstate->gfbb->pagesPerBuffer =
     941               3 :             calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep);
     942 ECB             :     }
     943                 : 
     944                 :     /*
     945                 :      * In 'auto' mode, check if the index has grown too large to fit in cache,
     946                 :      * and switch to buffering mode if it has.
     947                 :      *
     948                 :      * To avoid excessive calls to smgrnblocks(), only check this every
     949                 :      * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples.
     950                 :      *
     951                 :      * In 'stats' state, switch as soon as we have seen enough tuples to have
     952                 :      * some idea of the average tuple size.
     953                 :      */
     954 GIC      321974 :     if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
     955          291971 :          buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
     956 CBC        1098 :          effective_cache_size < smgrnblocks(RelationGetSmgr(index),
     957          321974 :                                             MAIN_FORKNUM)) ||
     958          321974 :         (buildstate->buildMode == GIST_BUFFERING_STATS &&
     959           12288 :          buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
     960 ECB             :     {
     961                 :         /*
     962                 :          * Index doesn't fit in effective cache anymore. Try to switch to
     963                 :          * buffering build mode.
     964                 :          */
     965 GIC           3 :         gistInitBuffering(buildstate);
     966                 :     }
     967 CBC      321974 : }
     968                 : 
     969 ECB             : /*
     970                 :  * Insert function for buffering index build.
     971                 :  */
     972                 : static void
     973 GIC       17715 : gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup)
     974                 : {
     975 ECB             :     /* Insert the tuple to buffers. */
     976 GIC       17715 :     gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel);
     977                 : 
     978 ECB             :     /* If we filled up (half of a) buffer, process buffer emptying. */
     979 GIC       17715 :     gistProcessEmptyingQueue(buildstate);
     980           17715 : }
     981 ECB             : 
     982                 : /*
     983                 :  * Process an index tuple. Runs the tuple down the tree until we reach a leaf
     984                 :  * page or node buffer, and inserts the tuple there. Returns true if we have
     985                 :  * to stop buffer emptying process (because one of child buffers can't take
     986                 :  * index tuples anymore).
     987                 :  */
     988                 : static bool
     989 GIC       34881 : gistProcessItup(GISTBuildState *buildstate, IndexTuple itup,
     990                 :                 BlockNumber startblkno, int startlevel)
     991 ECB             : {
     992 GIC       34881 :     GISTSTATE  *giststate = buildstate->giststate;
     993           34881 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
     994 CBC       34881 :     Relation    indexrel = buildstate->indexrel;
     995 ECB             :     BlockNumber childblkno;
     996                 :     Buffer      buffer;
     997 GIC       34881 :     bool        result = false;
     998                 :     BlockNumber blkno;
     999 ECB             :     int         level;
    1000 GIC       34881 :     OffsetNumber downlinkoffnum = InvalidOffsetNumber;
    1001           34881 :     BlockNumber parentblkno = InvalidBlockNumber;
    1002 ECB             : 
    1003 CBC       34881 :     CHECK_FOR_INTERRUPTS();
    1004                 : 
    1005 ECB             :     /*
    1006                 :      * Loop until we reach a leaf page (level == 0) or a level with buffers
    1007                 :      * (not including the level we start at, because we would otherwise make
    1008                 :      * no progress).
    1009                 :      */
    1010 GIC       34881 :     blkno = startblkno;
    1011           34881 :     level = startlevel;
    1012 ECB             :     for (;;)
    1013 CBC       34881 :     {
    1014                 :         ItemId      iid;
    1015 ECB             :         IndexTuple  idxtuple,
    1016                 :                     newtup;
    1017                 :         Page        page;
    1018                 :         OffsetNumber childoffnum;
    1019                 : 
    1020                 :         /* Have we reached a level with buffers? */
    1021 GIC       69762 :         if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel)
    1022           17166 :             break;
    1023 ECB             : 
    1024                 :         /* Have we reached a leaf page? */
    1025 GIC       52596 :         if (level == 0)
    1026           17715 :             break;
    1027 ECB             : 
    1028                 :         /*
    1029                 :          * Nope. Descend down to the next level then. Choose a child to
    1030                 :          * descend down to.
    1031                 :          */
    1032                 : 
    1033 GIC       34881 :         buffer = ReadBuffer(indexrel, blkno);
    1034           34881 :         LockBuffer(buffer, GIST_EXCLUSIVE);
    1035 ECB             : 
    1036 CBC       34881 :         page = (Page) BufferGetPage(buffer);
    1037 GIC       34881 :         childoffnum = gistchoose(indexrel, page, itup, giststate);
    1038 CBC       34881 :         iid = PageGetItemId(page, childoffnum);
    1039           34881 :         idxtuple = (IndexTuple) PageGetItem(page, iid);
    1040           34881 :         childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
    1041 ECB             : 
    1042 CBC       34881 :         if (level > 1)
    1043 GIC       17166 :             gistMemorizeParent(buildstate, childblkno, blkno);
    1044 ECB             : 
    1045                 :         /*
    1046                 :          * Check that the key representing the target child node is consistent
    1047                 :          * with the key we're inserting. Update it if it's not.
    1048                 :          */
    1049 GIC       34881 :         newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate);
    1050           34881 :         if (newtup)
    1051 ECB             :         {
    1052 CBC       34473 :             blkno = gistbufferinginserttuples(buildstate,
    1053                 :                                               buffer,
    1054 ECB             :                                               level,
    1055                 :                                               &newtup,
    1056                 :                                               1,
    1057                 :                                               childoffnum,
    1058                 :                                               InvalidBlockNumber,
    1059                 :                                               InvalidOffsetNumber);
    1060                 :             /* gistbufferinginserttuples() released the buffer */
    1061                 :         }
    1062                 :         else
    1063 GIC         408 :             UnlockReleaseBuffer(buffer);
    1064                 : 
    1065 ECB             :         /* Descend to the child */
    1066 GIC       34881 :         parentblkno = blkno;
    1067           34881 :         blkno = childblkno;
    1068 CBC       34881 :         downlinkoffnum = childoffnum;
    1069           34881 :         Assert(level > 0);
    1070           34881 :         level--;
    1071 ECB             :     }
    1072                 : 
    1073 GIC       34881 :     if (LEVEL_HAS_BUFFERS(level, gfbb))
    1074           17166 :     {
    1075 ECB             :         /*
    1076                 :          * We've reached level with buffers. Place the index tuple to the
    1077                 :          * buffer, and add the buffer to the emptying queue if it overflows.
    1078                 :          */
    1079                 :         GISTNodeBuffer *childNodeBuffer;
    1080                 : 
    1081                 :         /* Find the buffer or create a new one */
    1082 GIC       17166 :         childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level);
    1083                 : 
    1084 ECB             :         /* Add index tuple to it */
    1085 GIC       17166 :         gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup);
    1086                 : 
    1087 CBC       17166 :         if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb))
    1088 UIC           0 :             result = true;
    1089 ECB             :     }
    1090 EUB             :     else
    1091                 :     {
    1092                 :         /*
    1093                 :          * We've reached a leaf page. Place the tuple here.
    1094                 :          */
    1095 GIC       17715 :         Assert(level == 0);
    1096           17715 :         buffer = ReadBuffer(indexrel, blkno);
    1097 CBC       17715 :         LockBuffer(buffer, GIST_EXCLUSIVE);
    1098           17715 :         gistbufferinginserttuples(buildstate, buffer, level,
    1099 ECB             :                                   &itup, 1, InvalidOffsetNumber,
    1100                 :                                   parentblkno, downlinkoffnum);
    1101                 :         /* gistbufferinginserttuples() released the buffer */
    1102                 :     }
    1103                 : 
    1104 GIC       34881 :     return result;
    1105                 : }
    1106 ECB             : 
    1107                 : /*
    1108                 :  * Insert tuples to a given page.
    1109                 :  *
    1110                 :  * This is analogous with gistinserttuples() in the regular insertion code.
    1111                 :  *
    1112                 :  * Returns the block number of the page where the (first) new or updated tuple
    1113                 :  * was inserted. Usually that's the original page, but might be a sibling page
    1114                 :  * if the original page was split.
    1115                 :  *
    1116                 :  * Caller should hold a lock on 'buffer' on entry. This function will unlock
    1117                 :  * and unpin it.
    1118                 :  */
    1119                 : static BlockNumber
    1120 GIC       52572 : gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level,
    1121                 :                           IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
    1122 ECB             :                           BlockNumber parentblk, OffsetNumber downlinkoffnum)
    1123                 : {
    1124 GIC       52572 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
    1125                 :     List       *splitinfo;
    1126 ECB             :     bool        is_split;
    1127 GIC       52572 :     BlockNumber placed_to_blk = InvalidBlockNumber;
    1128                 : 
    1129 CBC       52572 :     is_split = gistplacetopage(buildstate->indexrel,
    1130                 :                                buildstate->freespace,
    1131 ECB             :                                buildstate->giststate,
    1132                 :                                buffer,
    1133                 :                                itup, ntup, oldoffnum, &placed_to_blk,
    1134                 :                                InvalidBuffer,
    1135                 :                                &splitinfo,
    1136                 :                                false,
    1137                 :                                buildstate->heaprel, true);
    1138                 : 
    1139                 :     /*
    1140                 :      * If this is a root split, update the root path item kept in memory. This
    1141                 :      * ensures that all path stacks are always complete, including all parent
    1142                 :      * nodes up to the root. That simplifies the algorithm to re-find correct
    1143                 :      * parent.
    1144                 :      */
    1145 GIC       52572 :     if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO)
    1146                 :     {
    1147 CBC           3 :         Page        page = BufferGetPage(buffer);
    1148                 :         OffsetNumber off;
    1149 ECB             :         OffsetNumber maxoff;
    1150                 : 
    1151 GIC           3 :         Assert(level == gfbb->rootlevel);
    1152               3 :         gfbb->rootlevel++;
    1153 ECB             : 
    1154 CBC           3 :         elog(DEBUG2, "splitting GiST root page, now %d levels deep", gfbb->rootlevel);
    1155                 : 
    1156 ECB             :         /*
    1157                 :          * All the downlinks on the old root page are now on one of the child
    1158                 :          * pages. Visit all the new child pages to memorize the parents of the
    1159                 :          * grandchildren.
    1160                 :          */
    1161 GIC           3 :         if (gfbb->rootlevel > 1)
    1162                 :         {
    1163 CBC           3 :             maxoff = PageGetMaxOffsetNumber(page);
    1164 GIC           9 :             for (off = FirstOffsetNumber; off <= maxoff; off++)
    1165 ECB             :             {
    1166 CBC           6 :                 ItemId      iid = PageGetItemId(page, off);
    1167 GIC           6 :                 IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1168 CBC           6 :                 BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
    1169               6 :                 Buffer      childbuf = ReadBuffer(buildstate->indexrel, childblkno);
    1170 ECB             : 
    1171 CBC           6 :                 LockBuffer(childbuf, GIST_SHARE);
    1172 GIC           6 :                 gistMemorizeAllDownlinks(buildstate, childbuf);
    1173 CBC           6 :                 UnlockReleaseBuffer(childbuf);
    1174 ECB             : 
    1175                 :                 /*
    1176                 :                  * Also remember that the parent of the new child page is the
    1177                 :                  * root block.
    1178                 :                  */
    1179 GIC           6 :                 gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO);
    1180                 :             }
    1181 ECB             :         }
    1182                 :     }
    1183                 : 
    1184 GIC       52572 :     if (splitinfo)
    1185                 :     {
    1186 ECB             :         /*
    1187                 :          * Insert the downlinks to the parent. This is analogous with
    1188                 :          * gistfinishsplit() in the regular insertion code, but the locking is
    1189                 :          * simpler, and we have to maintain the buffers on internal nodes and
    1190                 :          * the parent map.
    1191                 :          */
    1192                 :         IndexTuple *downlinks;
    1193                 :         int         ndownlinks,
    1194                 :                     i;
    1195                 :         Buffer      parentBuffer;
    1196                 :         ListCell   *lc;
    1197                 : 
    1198                 :         /* Parent may have changed since we memorized this path. */
    1199                 :         parentBuffer =
    1200 GIC         384 :             gistBufferingFindCorrectParent(buildstate,
    1201                 :                                            BufferGetBlockNumber(buffer),
    1202 ECB             :                                            level,
    1203                 :                                            &parentblk,
    1204                 :                                            &downlinkoffnum);
    1205                 : 
    1206                 :         /*
    1207                 :          * If there's a buffer associated with this page, that needs to be
    1208                 :          * split too. gistRelocateBuildBuffersOnSplit() will also adjust the
    1209                 :          * downlinks in 'splitinfo', to make sure they're consistent not only
    1210                 :          * with the tuples already on the pages, but also the tuples in the
    1211                 :          * buffers that will eventually be inserted to them.
    1212                 :          */
    1213 GIC         384 :         gistRelocateBuildBuffersOnSplit(gfbb,
    1214                 :                                         buildstate->giststate,
    1215 ECB             :                                         buildstate->indexrel,
    1216                 :                                         level,
    1217                 :                                         buffer, splitinfo);
    1218                 : 
    1219                 :         /* Create an array of all the downlink tuples */
    1220 GIC         384 :         ndownlinks = list_length(splitinfo);
    1221             384 :         downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks);
    1222 CBC         384 :         i = 0;
    1223            1152 :         foreach(lc, splitinfo)
    1224 ECB             :         {
    1225 CBC         768 :             GISTPageSplitInfo *splitinfo = lfirst(lc);
    1226                 : 
    1227 ECB             :             /*
    1228                 :              * Remember the parent of each new child page in our parent map.
    1229                 :              * This assumes that the downlinks fit on the parent page. If the
    1230                 :              * parent page is split, too, when we recurse up to insert the
    1231                 :              * downlinks, the recursive gistbufferinginserttuples() call will
    1232                 :              * update the map again.
    1233                 :              */
    1234 GIC         768 :             if (level > 0)
    1235              12 :                 gistMemorizeParent(buildstate,
    1236 ECB             :                                    BufferGetBlockNumber(splitinfo->buf),
    1237                 :                                    BufferGetBlockNumber(parentBuffer));
    1238                 : 
    1239                 :             /*
    1240                 :              * Also update the parent map for all the downlinks that got moved
    1241                 :              * to a different page. (actually this also loops through the
    1242                 :              * downlinks that stayed on the original page, but it does no
    1243                 :              * harm).
    1244                 :              */
    1245 GIC         768 :             if (level > 1)
    1246 UIC           0 :                 gistMemorizeAllDownlinks(buildstate, splitinfo->buf);
    1247 ECB             : 
    1248 EUB             :             /*
    1249                 :              * Since there's no concurrent access, we can release the lower
    1250                 :              * level buffers immediately. This includes the original page.
    1251                 :              */
    1252 GIC         768 :             UnlockReleaseBuffer(splitinfo->buf);
    1253             768 :             downlinks[i++] = splitinfo->downlink;
    1254 ECB             :         }
    1255                 : 
    1256                 :         /* Insert them into parent. */
    1257 GIC         384 :         gistbufferinginserttuples(buildstate, parentBuffer, level + 1,
    1258                 :                                   downlinks, ndownlinks, downlinkoffnum,
    1259 ECB             :                                   InvalidBlockNumber, InvalidOffsetNumber);
    1260                 : 
    1261 GIC         384 :         list_free_deep(splitinfo);  /* we don't need this anymore */
    1262                 :     }
    1263 ECB             :     else
    1264 GIC       52188 :         UnlockReleaseBuffer(buffer);
    1265                 : 
    1266 CBC       52572 :     return placed_to_blk;
    1267                 : }
    1268 ECB             : 
    1269                 : /*
    1270                 :  * Find the downlink pointing to a child page.
    1271                 :  *
    1272                 :  * 'childblkno' indicates the child page to find the parent for. 'level' is
    1273                 :  * the level of the child. On entry, *parentblkno and *downlinkoffnum can
    1274                 :  * point to a location where the downlink used to be - we will check that
    1275                 :  * location first, and save some cycles if it hasn't moved. The function
    1276                 :  * returns a buffer containing the downlink, exclusively-locked, and
    1277                 :  * *parentblkno and *downlinkoffnum are set to the real location of the
    1278                 :  * downlink.
    1279                 :  *
    1280                 :  * If the child page is a leaf (level == 0), the caller must supply a correct
    1281                 :  * parentblkno. Otherwise we use the parent map hash table to find the parent
    1282                 :  * block.
    1283                 :  *
    1284                 :  * This function serves the same purpose as gistFindCorrectParent() during
    1285                 :  * normal index inserts, but this is simpler because we don't need to deal
    1286                 :  * with concurrent inserts.
    1287                 :  */
    1288                 : static Buffer
    1289 GIC         384 : gistBufferingFindCorrectParent(GISTBuildState *buildstate,
    1290                 :                                BlockNumber childblkno, int level,
    1291 ECB             :                                BlockNumber *parentblkno,
    1292                 :                                OffsetNumber *downlinkoffnum)
    1293                 : {
    1294                 :     BlockNumber parent;
    1295                 :     Buffer      buffer;
    1296                 :     Page        page;
    1297                 :     OffsetNumber maxoff;
    1298                 :     OffsetNumber off;
    1299                 : 
    1300 GIC         384 :     if (level > 0)
    1301               6 :         parent = gistGetParent(buildstate, childblkno);
    1302 ECB             :     else
    1303                 :     {
    1304                 :         /*
    1305                 :          * For a leaf page, the caller must supply a correct parent block
    1306                 :          * number.
    1307                 :          */
    1308 GIC         378 :         if (*parentblkno == InvalidBlockNumber)
    1309 UIC           0 :             elog(ERROR, "no parent buffer provided of child %u", childblkno);
    1310 CBC         378 :         parent = *parentblkno;
    1311 EUB             :     }
    1312 ECB             : 
    1313 GIC         384 :     buffer = ReadBuffer(buildstate->indexrel, parent);
    1314             384 :     page = BufferGetPage(buffer);
    1315 CBC         384 :     LockBuffer(buffer, GIST_EXCLUSIVE);
    1316             384 :     gistcheckpage(buildstate->indexrel, buffer);
    1317             384 :     maxoff = PageGetMaxOffsetNumber(page);
    1318 ECB             : 
    1319                 :     /* Check if it was not moved */
    1320 GIC         384 :     if (parent == *parentblkno && *parentblkno != InvalidBlockNumber &&
    1321             378 :         *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff)
    1322 ECB             :     {
    1323 CBC         378 :         ItemId      iid = PageGetItemId(page, *downlinkoffnum);
    1324 GIC         378 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1325 ECB             : 
    1326 CBC         378 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
    1327                 :         {
    1328 ECB             :             /* Still there */
    1329 GIC         378 :             return buffer;
    1330                 :         }
    1331 ECB             :     }
    1332                 : 
    1333                 :     /*
    1334                 :      * Downlink was not at the offset where it used to be. Scan the page to
    1335                 :      * find it. During normal gist insertions, it might've moved to another
    1336                 :      * page, to the right, but during a buffering build, we keep track of the
    1337                 :      * parent of each page in the lookup table so we should always know what
    1338                 :      * page it's on.
    1339                 :      */
    1340 GIC          15 :     for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off))
    1341                 :     {
    1342 CBC          15 :         ItemId      iid = PageGetItemId(page, off);
    1343 GIC          15 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1344 ECB             : 
    1345 CBC          15 :         if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno)
    1346                 :         {
    1347 ECB             :             /* yes!!, found it */
    1348 GIC           6 :             *downlinkoffnum = off;
    1349               6 :             return buffer;
    1350 ECB             :         }
    1351                 :     }
    1352                 : 
    1353 UIC           0 :     elog(ERROR, "failed to re-find parent for block %u", childblkno);
    1354                 :     return InvalidBuffer;       /* keep compiler quiet */
    1355 EUB             : }
    1356                 : 
    1357                 : /*
    1358                 :  * Process buffers emptying stack. Emptying of one buffer can cause emptying
    1359                 :  * of other buffers. This function iterates until this cascading emptying
    1360                 :  * process finished, e.g. until buffers emptying stack is empty.
    1361                 :  */
    1362                 : static void
    1363 GIC       17724 : gistProcessEmptyingQueue(GISTBuildState *buildstate)
    1364                 : {
    1365 CBC       17724 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
    1366                 : 
    1367 ECB             :     /* Iterate while we have elements in buffers emptying stack. */
    1368 GIC       17733 :     while (gfbb->bufferEmptyingQueue != NIL)
    1369                 :     {
    1370 ECB             :         GISTNodeBuffer *emptyingNodeBuffer;
    1371                 : 
    1372                 :         /* Get node buffer from emptying stack. */
    1373 GIC           9 :         emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue);
    1374               9 :         gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue);
    1375 CBC           9 :         emptyingNodeBuffer->queuedForEmptying = false;
    1376 ECB             : 
    1377                 :         /*
    1378                 :          * We are going to load last pages of buffers where emptying will be
    1379                 :          * to. So let's unload any previously loaded buffers.
    1380                 :          */
    1381 GIC           9 :         gistUnloadNodeBuffers(gfbb);
    1382                 : 
    1383 ECB             :         /*
    1384                 :          * Pop tuples from the buffer and run them down to the buffers at
    1385                 :          * lower level, or leaf pages. We continue until one of the lower
    1386                 :          * level buffers fills up, or this buffer runs empty.
    1387                 :          *
    1388                 :          * In Arge et al's paper, the buffer emptying is stopped after
    1389                 :          * processing 1/2 node buffer worth of tuples, to avoid overfilling
    1390                 :          * any of the lower level buffers. However, it's more efficient to
    1391                 :          * keep going until one of the lower level buffers actually fills up,
    1392                 :          * so that's what we do. This doesn't need to be exact, if a buffer
    1393                 :          * overfills by a few tuples, there's no harm done.
    1394                 :          */
    1395                 :         while (true)
    1396 GIC       17166 :         {
    1397                 :             IndexTuple  itup;
    1398 ECB             : 
    1399                 :             /* Get next index tuple from the buffer */
    1400 GIC       17175 :             if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup))
    1401               9 :                 break;
    1402 ECB             : 
    1403                 :             /*
    1404                 :              * Run it down to the underlying node buffer or leaf page.
    1405                 :              *
    1406                 :              * Note: it's possible that the buffer we're emptying splits as a
    1407                 :              * result of this call. If that happens, our emptyingNodeBuffer
    1408                 :              * points to the left half of the split. After split, it's very
    1409                 :              * likely that the new left buffer is no longer over the half-full
    1410                 :              * threshold, but we might as well keep flushing tuples from it
    1411                 :              * until we fill a lower-level buffer.
    1412                 :              */
    1413 GIC       17166 :             if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level))
    1414                 :             {
    1415 ECB             :                 /*
    1416                 :                  * A lower level buffer filled up. Stop emptying this buffer,
    1417                 :                  * to avoid overflowing the lower level buffer.
    1418                 :                  */
    1419 UIC           0 :                 break;
    1420                 :             }
    1421 EUB             : 
    1422                 :             /* Free all the memory allocated during index tuple processing */
    1423 GIC       17166 :             MemoryContextReset(buildstate->giststate->tempCxt);
    1424                 :         }
    1425 ECB             :     }
    1426 GIC       17724 : }
    1427                 : 
    1428 ECB             : /*
    1429                 :  * Empty all node buffers, from top to bottom. This is done at the end of
    1430                 :  * index build to flush all remaining tuples to the index.
    1431                 :  *
    1432                 :  * Note: This destroys the buffersOnLevels lists, so the buffers should not
    1433                 :  * be inserted to after this call.
    1434                 :  */
    1435                 : static void
    1436 GIC           3 : gistEmptyAllBuffers(GISTBuildState *buildstate)
    1437                 : {
    1438 CBC           3 :     GISTBuildBuffers *gfbb = buildstate->gfbb;
    1439                 :     MemoryContext oldCtx;
    1440 ECB             :     int         i;
    1441                 : 
    1442 GIC           3 :     oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
    1443                 : 
    1444 ECB             :     /*
    1445                 :      * Iterate through the levels from top to bottom.
    1446                 :      */
    1447 GIC           9 :     for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--)
    1448                 :     {
    1449 ECB             :         /*
    1450                 :          * Empty all buffers on this level. Note that new buffers can pop up
    1451                 :          * in the list during the processing, as a result of page splits, so a
    1452                 :          * simple walk through the list won't work. We remove buffers from the
    1453                 :          * list when we see them empty; a buffer can't become non-empty once
    1454                 :          * it's been fully emptied.
    1455                 :          */
    1456 GIC          24 :         while (gfbb->buffersOnLevels[i] != NIL)
    1457                 :         {
    1458 ECB             :             GISTNodeBuffer *nodeBuffer;
    1459                 : 
    1460 GIC          18 :             nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]);
    1461                 : 
    1462 CBC          18 :             if (nodeBuffer->blocksCount != 0)
    1463                 :             {
    1464 ECB             :                 /*
    1465                 :                  * Add this buffer to the emptying queue, and proceed to empty
    1466                 :                  * the queue.
    1467                 :                  */
    1468 GIC           9 :                 if (!nodeBuffer->queuedForEmptying)
    1469                 :                 {
    1470 CBC           9 :                     MemoryContextSwitchTo(gfbb->context);
    1471 GIC           9 :                     nodeBuffer->queuedForEmptying = true;
    1472 CBC           9 :                     gfbb->bufferEmptyingQueue =
    1473               9 :                         lcons(nodeBuffer, gfbb->bufferEmptyingQueue);
    1474               9 :                     MemoryContextSwitchTo(buildstate->giststate->tempCxt);
    1475 ECB             :                 }
    1476 CBC           9 :                 gistProcessEmptyingQueue(buildstate);
    1477                 :             }
    1478 ECB             :             else
    1479 GIC           9 :                 gfbb->buffersOnLevels[i] =
    1480               9 :                     list_delete_first(gfbb->buffersOnLevels[i]);
    1481 ECB             :         }
    1482 CBC           6 :         elog(DEBUG2, "emptied all buffers at level %d", i);
    1483                 :     }
    1484               3 :     MemoryContextSwitchTo(oldCtx);
    1485 GIC           3 : }
    1486 ECB             : 
    1487                 : /*
    1488                 :  * Get the depth of the GiST index.
    1489                 :  */
    1490                 : static int
    1491 GIC           3 : gistGetMaxLevel(Relation index)
    1492                 : {
    1493 ECB             :     int         maxLevel;
    1494                 :     BlockNumber blkno;
    1495                 : 
    1496                 :     /*
    1497                 :      * Traverse down the tree, starting from the root, until we hit the leaf
    1498                 :      * level.
    1499                 :      */
    1500 GIC           3 :     maxLevel = 0;
    1501               3 :     blkno = GIST_ROOT_BLKNO;
    1502 ECB             :     while (true)
    1503 CBC           3 :     {
    1504                 :         Buffer      buffer;
    1505 ECB             :         Page        page;
    1506                 :         IndexTuple  itup;
    1507                 : 
    1508 GIC           6 :         buffer = ReadBuffer(index, blkno);
    1509                 : 
    1510 ECB             :         /*
    1511                 :          * There's no concurrent access during index build, so locking is just
    1512                 :          * pro forma.
    1513                 :          */
    1514 GIC           6 :         LockBuffer(buffer, GIST_SHARE);
    1515               6 :         page = (Page) BufferGetPage(buffer);
    1516 ECB             : 
    1517 CBC           6 :         if (GistPageIsLeaf(page))
    1518                 :         {
    1519 ECB             :             /* We hit the bottom, so we're done. */
    1520 GIC           3 :             UnlockReleaseBuffer(buffer);
    1521               3 :             break;
    1522 ECB             :         }
    1523                 : 
    1524                 :         /*
    1525                 :          * Pick the first downlink on the page, and follow it. It doesn't
    1526                 :          * matter which downlink we choose, the tree has the same depth
    1527                 :          * everywhere, so we just pick the first one.
    1528                 :          */
    1529 GIC           3 :         itup = (IndexTuple) PageGetItem(page,
    1530                 :                                         PageGetItemId(page, FirstOffsetNumber));
    1531 CBC           3 :         blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
    1532 GIC           3 :         UnlockReleaseBuffer(buffer);
    1533 ECB             : 
    1534                 :         /*
    1535                 :          * We're going down on the tree. It means that there is yet one more
    1536                 :          * level in the tree.
    1537                 :          */
    1538 GIC           3 :         maxLevel++;
    1539                 :     }
    1540 CBC           3 :     return maxLevel;
    1541                 : }
    1542 ECB             : 
    1543                 : 
    1544                 : /*
    1545                 :  * Routines for managing the parent map.
    1546                 :  *
    1547                 :  * Whenever a page is split, we need to insert the downlinks into the parent.
    1548                 :  * We need to somehow find the parent page to do that. In normal insertions,
    1549                 :  * we keep a stack of nodes visited when we descend the tree. However, in
    1550                 :  * buffering build, we can start descending the tree from any internal node,
    1551                 :  * when we empty a buffer by cascading tuples to its children. So we don't
    1552                 :  * have a full stack up to the root available at that time.
    1553                 :  *
    1554                 :  * So instead, we maintain a hash table to track the parent of every internal
    1555                 :  * page. We don't need to track the parents of leaf nodes, however. Whenever
    1556                 :  * we insert to a leaf, we've just descended down from its parent, so we know
    1557                 :  * its immediate parent already. This helps a lot to limit the memory used
    1558                 :  * by this hash table.
    1559                 :  *
    1560                 :  * Whenever an internal node is split, the parent map needs to be updated.
    1561                 :  * the parent of the new child page needs to be recorded, and also the
    1562                 :  * entries for all page whose downlinks are moved to a new page at the split
    1563                 :  * needs to be updated.
    1564                 :  *
    1565                 :  * We also update the parent map whenever we descend the tree. That might seem
    1566                 :  * unnecessary, because we maintain the map whenever a downlink is moved or
    1567                 :  * created, but it is needed because we switch to buffering mode after
    1568                 :  * creating a tree with regular index inserts. Any pages created before
    1569                 :  * switching to buffering mode will not be present in the parent map initially,
    1570                 :  * but will be added there the first time we visit them.
    1571                 :  */
    1572                 : 
    1573                 : typedef struct
    1574                 : {
    1575                 :     BlockNumber childblkno;     /* hash key */
    1576                 :     BlockNumber parentblkno;
    1577                 : } ParentMapEntry;
    1578                 : 
    1579                 : static void
    1580 GIC           3 : gistInitParentMap(GISTBuildState *buildstate)
    1581                 : {
    1582 ECB             :     HASHCTL     hashCtl;
    1583                 : 
    1584 GIC           3 :     hashCtl.keysize = sizeof(BlockNumber);
    1585               3 :     hashCtl.entrysize = sizeof(ParentMapEntry);
    1586 CBC           3 :     hashCtl.hcxt = CurrentMemoryContext;
    1587               3 :     buildstate->parentMap = hash_create("gistbuild parent map",
    1588 ECB             :                                         1024,
    1589                 :                                         &hashCtl,
    1590                 :                                         HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
    1591 GIC           3 : }
    1592                 : 
    1593 ECB             : static void
    1594 GIC       17463 : gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent)
    1595                 : {
    1596 ECB             :     ParentMapEntry *entry;
    1597                 :     bool        found;
    1598                 : 
    1599 GIC       17463 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1600                 :                                            &child,
    1601 ECB             :                                            HASH_ENTER,
    1602                 :                                            &found);
    1603 GIC       17463 :     entry->parentblkno = parent;
    1604           17463 : }
    1605 ECB             : 
    1606                 : /*
    1607                 :  * Scan all downlinks on a page, and memorize their parent.
    1608                 :  */
    1609                 : static void
    1610 GIC           6 : gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf)
    1611                 : {
    1612 ECB             :     OffsetNumber maxoff;
    1613                 :     OffsetNumber off;
    1614 GIC           6 :     BlockNumber parentblkno = BufferGetBlockNumber(parentbuf);
    1615               6 :     Page        page = BufferGetPage(parentbuf);
    1616 ECB             : 
    1617 CBC           6 :     Assert(!GistPageIsLeaf(page));
    1618                 : 
    1619               6 :     maxoff = PageGetMaxOffsetNumber(page);
    1620 GIC         285 :     for (off = FirstOffsetNumber; off <= maxoff; off++)
    1621 ECB             :     {
    1622 CBC         279 :         ItemId      iid = PageGetItemId(page, off);
    1623 GIC         279 :         IndexTuple  idxtuple = (IndexTuple) PageGetItem(page, iid);
    1624 CBC         279 :         BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
    1625 ECB             : 
    1626 CBC         279 :         gistMemorizeParent(buildstate, childblkno, parentblkno);
    1627                 :     }
    1628               6 : }
    1629                 : 
    1630 ECB             : static BlockNumber
    1631 GIC           6 : gistGetParent(GISTBuildState *buildstate, BlockNumber child)
    1632                 : {
    1633 ECB             :     ParentMapEntry *entry;
    1634                 :     bool        found;
    1635                 : 
    1636                 :     /* Find node buffer in hash table */
    1637 GIC           6 :     entry = (ParentMapEntry *) hash_search(buildstate->parentMap,
    1638                 :                                            &child,
    1639 ECB             :                                            HASH_FIND,
    1640                 :                                            &found);
    1641 GIC           6 :     if (!found)
    1642 UIC           0 :         elog(ERROR, "could not find parent of block %u in lookup table", child);
    1643 ECB             : 
    1644 GBC           6 :     return entry->parentblkno;
    1645                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a