LCOV - differential code coverage report
Current view: top level - src/backend/access/brin - brin.c (source / functions) Coverage Total Hit UNC UBC GBC GNC CBC DUB DCB
Current: Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128 Lines: 92.8 % 838 778 11 49 31 251 496 1 15
Current Date: 2024-04-14 14:21:10 Functions: 100.0 % 39 39 1 20 18
Baseline: 16@8cea358b128 Branches: 67.8 % 404 274 23 107 21 47 206
Baseline Date: 2024-04-14 14:21:09 Line coverage date bins:
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed [..60] days: 100.0 % 1 1 1
(60,120] days: 100.0 % 15 15 15
(120,180] days: 94.8 % 250 237 11 2 232 5
(180,240] days: 100.0 % 5 5 3 2
(240..) days: 91.7 % 567 520 47 31 489
Function coverage date bins:
(60,120] days: 100.0 % 1 1 1
(120,180] days: 100.0 % 11 11 11
(240..) days: 100.0 % 27 27 1 8 18
Branch coverage date bins:
(60,120] days: 83.3 % 6 5 1 5
(120,180] days: 60.8 % 74 45 22 7 42 3
(240..) days: 69.1 % 324 224 100 21 203

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*
                                  2                 :                :  * brin.c
                                  3                 :                :  *      Implementation of BRIN indexes for Postgres
                                  4                 :                :  *
                                  5                 :                :  * See src/backend/access/brin/README for details.
                                  6                 :                :  *
                                  7                 :                :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
                                  8                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  9                 :                :  *
                                 10                 :                :  * IDENTIFICATION
                                 11                 :                :  *    src/backend/access/brin/brin.c
                                 12                 :                :  *
                                 13                 :                :  * TODO
                                 14                 :                :  *      * ScalarArrayOpExpr (amsearcharray -> SK_SEARCHARRAY)
                                 15                 :                :  */
                                 16                 :                : #include "postgres.h"
                                 17                 :                : 
                                 18                 :                : #include "access/brin.h"
                                 19                 :                : #include "access/brin_page.h"
                                 20                 :                : #include "access/brin_pageops.h"
                                 21                 :                : #include "access/brin_xlog.h"
                                 22                 :                : #include "access/relation.h"
                                 23                 :                : #include "access/reloptions.h"
                                 24                 :                : #include "access/relscan.h"
                                 25                 :                : #include "access/table.h"
                                 26                 :                : #include "access/tableam.h"
                                 27                 :                : #include "access/xloginsert.h"
                                 28                 :                : #include "catalog/index.h"
                                 29                 :                : #include "catalog/pg_am.h"
                                 30                 :                : #include "commands/vacuum.h"
                                 31                 :                : #include "miscadmin.h"
                                 32                 :                : #include "pgstat.h"
                                 33                 :                : #include "postmaster/autovacuum.h"
                                 34                 :                : #include "storage/bufmgr.h"
                                 35                 :                : #include "storage/freespace.h"
                                 36                 :                : #include "tcop/tcopprot.h"        /* pgrminclude ignore */
                                 37                 :                : #include "utils/acl.h"
                                 38                 :                : #include "utils/datum.h"
                                 39                 :                : #include "utils/fmgrprotos.h"
                                 40                 :                : #include "utils/guc.h"
                                 41                 :                : #include "utils/index_selfuncs.h"
                                 42                 :                : #include "utils/memutils.h"
                                 43                 :                : #include "utils/rel.h"
                                 44                 :                : #include "utils/tuplesort.h"
                                 45                 :                : 
                                 46                 :                : /* Magic numbers for parallel state sharing */
                                 47                 :                : #define PARALLEL_KEY_BRIN_SHARED        UINT64CONST(0xB000000000000001)
                                 48                 :                : #define PARALLEL_KEY_TUPLESORT          UINT64CONST(0xB000000000000002)
                                 49                 :                : #define PARALLEL_KEY_QUERY_TEXT         UINT64CONST(0xB000000000000003)
                                 50                 :                : #define PARALLEL_KEY_WAL_USAGE          UINT64CONST(0xB000000000000004)
                                 51                 :                : #define PARALLEL_KEY_BUFFER_USAGE       UINT64CONST(0xB000000000000005)
                                 52                 :                : 
                                 53                 :                : /*
                                 54                 :                :  * Status for index builds performed in parallel.  This is allocated in a
                                 55                 :                :  * dynamic shared memory segment.
                                 56                 :                :  */
                                 57                 :                : typedef struct BrinShared
                                 58                 :                : {
                                 59                 :                :     /*
                                 60                 :                :      * These fields are not modified during the build.  They primarily exist
                                 61                 :                :      * for the benefit of worker processes that need to create state
                                 62                 :                :      * corresponding to that used by the leader.
                                 63                 :                :      */
                                 64                 :                :     Oid         heaprelid;
                                 65                 :                :     Oid         indexrelid;
                                 66                 :                :     bool        isconcurrent;
                                 67                 :                :     BlockNumber pagesPerRange;
                                 68                 :                :     int         scantuplesortstates;
                                 69                 :                : 
                                 70                 :                :     /*
                                 71                 :                :      * workersdonecv is used to monitor the progress of workers.  All parallel
                                 72                 :                :      * participants must indicate that they are done before leader can use
                                 73                 :                :      * results built by the workers (and before leader can write the data into
                                 74                 :                :      * the index).
                                 75                 :                :      */
                                 76                 :                :     ConditionVariable workersdonecv;
                                 77                 :                : 
                                 78                 :                :     /*
                                 79                 :                :      * mutex protects all fields before heapdesc.
                                 80                 :                :      *
                                 81                 :                :      * These fields contain status information of interest to BRIN index
                                 82                 :                :      * builds that must work just the same when an index is built in parallel.
                                 83                 :                :      */
                                 84                 :                :     slock_t     mutex;
                                 85                 :                : 
                                 86                 :                :     /*
                                 87                 :                :      * Mutable state that is maintained by workers, and reported back to
                                 88                 :                :      * leader at end of the scans.
                                 89                 :                :      *
                                 90                 :                :      * nparticipantsdone is number of worker processes finished.
                                 91                 :                :      *
                                 92                 :                :      * reltuples is the total number of input heap tuples.
                                 93                 :                :      *
                                 94                 :                :      * indtuples is the total number of tuples that made it into the index.
                                 95                 :                :      */
                                 96                 :                :     int         nparticipantsdone;
                                 97                 :                :     double      reltuples;
                                 98                 :                :     double      indtuples;
                                 99                 :                : 
                                100                 :                :     /*
                                101                 :                :      * ParallelTableScanDescData data follows. Can't directly embed here, as
                                102                 :                :      * implementations of the parallel table scan desc interface might need
                                103                 :                :      * stronger alignment.
                                104                 :                :      */
                                105                 :                : } BrinShared;
                                106                 :                : 
                                107                 :                : /*
                                108                 :                :  * Return pointer to a BrinShared's parallel table scan.
                                109                 :                :  *
                                110                 :                :  * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
                                111                 :                :  * MAXALIGN.
                                112                 :                :  */
                                113                 :                : #define ParallelTableScanFromBrinShared(shared) \
                                114                 :                :     (ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(BrinShared)))
                                115                 :                : 
                                116                 :                : /*
                                117                 :                :  * Status for leader in parallel index build.
                                118                 :                :  */
                                119                 :                : typedef struct BrinLeader
                                120                 :                : {
                                121                 :                :     /* parallel context itself */
                                122                 :                :     ParallelContext *pcxt;
                                123                 :                : 
                                124                 :                :     /*
                                125                 :                :      * nparticipanttuplesorts is the exact number of worker processes
                                126                 :                :      * successfully launched, plus one leader process if it participates as a
                                127                 :                :      * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
                                128                 :                :      * participating as a worker).
                                129                 :                :      */
                                130                 :                :     int         nparticipanttuplesorts;
                                131                 :                : 
                                132                 :                :     /*
                                133                 :                :      * Leader process convenience pointers to shared state (leader avoids TOC
                                134                 :                :      * lookups).
                                135                 :                :      *
                                136                 :                :      * brinshared is the shared state for entire build.  sharedsort is the
                                137                 :                :      * shared, tuplesort-managed state passed to each process tuplesort.
                                138                 :                :      * snapshot is the snapshot used by the scan iff an MVCC snapshot is
                                139                 :                :      * required.
                                140                 :                :      */
                                141                 :                :     BrinShared *brinshared;
                                142                 :                :     Sharedsort *sharedsort;
                                143                 :                :     Snapshot    snapshot;
                                144                 :                :     WalUsage   *walusage;
                                145                 :                :     BufferUsage *bufferusage;
                                146                 :                : } BrinLeader;
                                147                 :                : 
                                148                 :                : /*
                                149                 :                :  * We use a BrinBuildState during initial construction of a BRIN index.
                                150                 :                :  * The running state is kept in a BrinMemTuple.
                                151                 :                :  */
                                152                 :                : typedef struct BrinBuildState
                                153                 :                : {
                                154                 :                :     Relation    bs_irel;
                                155                 :                :     double      bs_numtuples;
                                156                 :                :     double      bs_reltuples;
                                157                 :                :     Buffer      bs_currentInsertBuf;
                                158                 :                :     BlockNumber bs_pagesPerRange;
                                159                 :                :     BlockNumber bs_currRangeStart;
                                160                 :                :     BlockNumber bs_maxRangeStart;
                                161                 :                :     BrinRevmap *bs_rmAccess;
                                162                 :                :     BrinDesc   *bs_bdesc;
                                163                 :                :     BrinMemTuple *bs_dtuple;
                                164                 :                : 
                                165                 :                :     BrinTuple  *bs_emptyTuple;
                                166                 :                :     Size        bs_emptyTupleLen;
                                167                 :                :     MemoryContext bs_context;
                                168                 :                : 
                                169                 :                :     /*
                                170                 :                :      * bs_leader is only present when a parallel index build is performed, and
                                171                 :                :      * only in the leader process. (Actually, only the leader process has a
                                172                 :                :      * BrinBuildState.)
                                173                 :                :      */
                                174                 :                :     BrinLeader *bs_leader;
                                175                 :                :     int         bs_worker_id;
                                176                 :                : 
                                177                 :                :     /*
                                178                 :                :      * The sortstate is used by workers (including the leader). It has to be
                                179                 :                :      * part of the build state, because that's the only thing passed to the
                                180                 :                :      * build callback etc.
                                181                 :                :      */
                                182                 :                :     Tuplesortstate *bs_sortstate;
                                183                 :                : } BrinBuildState;
                                184                 :                : 
                                185                 :                : /*
                                186                 :                :  * We use a BrinInsertState to capture running state spanning multiple
                                187                 :                :  * brininsert invocations, within the same command.
                                188                 :                :  */
                                189                 :                : typedef struct BrinInsertState
                                190                 :                : {
                                191                 :                :     BrinRevmap *bis_rmAccess;
                                192                 :                :     BrinDesc   *bis_desc;
                                193                 :                :     BlockNumber bis_pages_per_range;
                                194                 :                : } BrinInsertState;
                                195                 :                : 
                                196                 :                : /*
                                197                 :                :  * Struct used as "opaque" during index scans
                                198                 :                :  */
                                199                 :                : typedef struct BrinOpaque
                                200                 :                : {
                                201                 :                :     BlockNumber bo_pagesPerRange;
                                202                 :                :     BrinRevmap *bo_rmAccess;
                                203                 :                :     BrinDesc   *bo_bdesc;
                                204                 :                : } BrinOpaque;
                                205                 :                : 
                                206                 :                : #define BRIN_ALL_BLOCKRANGES    InvalidBlockNumber
                                207                 :                : 
                                208                 :                : static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
                                209                 :                :                                                   BrinRevmap *revmap,
                                210                 :                :                                                   BlockNumber pagesPerRange,
                                211                 :                :                                                   BlockNumber tablePages);
                                212                 :                : static BrinInsertState *initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo);
                                213                 :                : static void terminate_brin_buildstate(BrinBuildState *state);
                                214                 :                : static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
                                215                 :                :                           bool include_partial, double *numSummarized, double *numExisting);
                                216                 :                : static void form_and_insert_tuple(BrinBuildState *state);
                                217                 :                : static void form_and_spill_tuple(BrinBuildState *state);
                                218                 :                : static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
                                219                 :                :                          BrinTuple *b);
                                220                 :                : static void brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy);
                                221                 :                : static bool add_values_to_range(Relation idxRel, BrinDesc *bdesc,
                                222                 :                :                                 BrinMemTuple *dtup, const Datum *values, const bool *nulls);
                                223                 :                : static bool check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys);
                                224                 :                : static void brin_fill_empty_ranges(BrinBuildState *state,
                                225                 :                :                                    BlockNumber prevRange, BlockNumber maxRange);
                                226                 :                : 
                                227                 :                : /* parallel index builds */
                                228                 :                : static void _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
                                229                 :                :                                  bool isconcurrent, int request);
                                230                 :                : static void _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state);
                                231                 :                : static Size _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
                                232                 :                : static void _brin_leader_participate_as_worker(BrinBuildState *buildstate,
                                233                 :                :                                                Relation heap, Relation index);
                                234                 :                : static void _brin_parallel_scan_and_build(BrinBuildState *buildstate,
                                235                 :                :                                           BrinShared *brinshared,
                                236                 :                :                                           Sharedsort *sharedsort,
                                237                 :                :                                           Relation heap, Relation index,
                                238                 :                :                                           int sortmem, bool progress);
                                239                 :                : 
                                240                 :                : /*
                                241                 :                :  * BRIN handler function: return IndexAmRoutine with access method parameters
                                242                 :                :  * and callbacks.
                                243                 :                :  */
                                244                 :                : Datum
 3010 tgl@sss.pgh.pa.us         245                 :CBC        1126 : brinhandler(PG_FUNCTION_ARGS)
                                246                 :                : {
                                247                 :           1126 :     IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
                                248                 :                : 
                                249                 :           1126 :     amroutine->amstrategies = 0;
                                250                 :           1126 :     amroutine->amsupport = BRIN_LAST_OPTIONAL_PROCNUM;
 1476 akorotkov@postgresql      251                 :           1126 :     amroutine->amoptsprocnum = BRIN_PROCNUM_OPTIONS;
 3010 tgl@sss.pgh.pa.us         252                 :           1126 :     amroutine->amcanorder = false;
                                253                 :           1126 :     amroutine->amcanorderbyop = false;
                                254                 :           1126 :     amroutine->amcanbackward = false;
                                255                 :           1126 :     amroutine->amcanunique = false;
                                256                 :           1126 :     amroutine->amcanmulticol = true;
                                257                 :           1126 :     amroutine->amoptionalkey = true;
                                258                 :           1126 :     amroutine->amsearcharray = false;
                                259                 :           1126 :     amroutine->amsearchnulls = true;
                                260                 :           1126 :     amroutine->amstorage = true;
                                261                 :           1126 :     amroutine->amclusterable = false;
                                262                 :           1126 :     amroutine->ampredlocks = false;
 2615 rhaas@postgresql.org      263                 :           1126 :     amroutine->amcanparallel = false;
  128 tomas.vondra@postgre      264                 :GNC        1126 :     amroutine->amcanbuildparallel = true;
 2199 teodor@sigaev.ru          265                 :CBC        1126 :     amroutine->amcaninclude = false;
 1551 akapila@postgresql.o      266                 :           1126 :     amroutine->amusemaintenanceworkmem = false;
  391 tomas.vondra@postgre      267                 :           1126 :     amroutine->amsummarizing = true;
 1551 akapila@postgresql.o      268                 :           1126 :     amroutine->amparallelvacuumoptions =
                                269                 :                :         VACUUM_OPTION_PARALLEL_CLEANUP;
 3010 tgl@sss.pgh.pa.us         270                 :           1126 :     amroutine->amkeytype = InvalidOid;
                                271                 :                : 
                                272                 :           1126 :     amroutine->ambuild = brinbuild;
                                273                 :           1126 :     amroutine->ambuildempty = brinbuildempty;
                                274                 :           1126 :     amroutine->aminsert = brininsert;
  141 tomas.vondra@postgre      275                 :GNC        1126 :     amroutine->aminsertcleanup = brininsertcleanup;
 3010 tgl@sss.pgh.pa.us         276                 :CBC        1126 :     amroutine->ambulkdelete = brinbulkdelete;
                                277                 :           1126 :     amroutine->amvacuumcleanup = brinvacuumcleanup;
                                278                 :           1126 :     amroutine->amcanreturn = NULL;
                                279                 :           1126 :     amroutine->amcostestimate = brincostestimate;
                                280                 :           1126 :     amroutine->amoptions = brinoptions;
 2801                           281                 :           1126 :     amroutine->amproperty = NULL;
 1839 alvherre@alvh.no-ip.      282                 :           1126 :     amroutine->ambuildphasename = NULL;
 3010 tgl@sss.pgh.pa.us         283                 :           1126 :     amroutine->amvalidate = brinvalidate;
 1352                           284                 :           1126 :     amroutine->amadjustmembers = NULL;
 3010                           285                 :           1126 :     amroutine->ambeginscan = brinbeginscan;
                                286                 :           1126 :     amroutine->amrescan = brinrescan;
                                287                 :           1126 :     amroutine->amgettuple = NULL;
                                288                 :           1126 :     amroutine->amgetbitmap = bringetbitmap;
                                289                 :           1126 :     amroutine->amendscan = brinendscan;
                                290                 :           1126 :     amroutine->ammarkpos = NULL;
                                291                 :           1126 :     amroutine->amrestrpos = NULL;
 2637 rhaas@postgresql.org      292                 :           1126 :     amroutine->amestimateparallelscan = NULL;
                                293                 :           1126 :     amroutine->aminitparallelscan = NULL;
                                294                 :           1126 :     amroutine->amparallelrescan = NULL;
                                295                 :                : 
 3010 tgl@sss.pgh.pa.us         296                 :           1126 :     PG_RETURN_POINTER(amroutine);
                                297                 :                : }
                                298                 :                : 
                                299                 :                : /*
                                300                 :                :  * Initialize a BrinInsertState to maintain state to be used across multiple
                                301                 :                :  * tuple inserts, within the same command.
                                302                 :                :  */
                                303                 :                : static BrinInsertState *
  141 tomas.vondra@postgre      304                 :GNC         542 : initialize_brin_insertstate(Relation idxRel, IndexInfo *indexInfo)
                                305                 :                : {
                                306                 :                :     BrinInsertState *bistate;
                                307                 :                :     MemoryContext oldcxt;
                                308                 :                : 
                                309                 :            542 :     oldcxt = MemoryContextSwitchTo(indexInfo->ii_Context);
                                310                 :            542 :     bistate = palloc0(sizeof(BrinInsertState));
                                311                 :            542 :     bistate->bis_desc = brin_build_desc(idxRel);
                                312                 :            542 :     bistate->bis_rmAccess = brinRevmapInitialize(idxRel,
                                313                 :                :                                                  &bistate->bis_pages_per_range);
                                314                 :            542 :     indexInfo->ii_AmCache = bistate;
                                315                 :            542 :     MemoryContextSwitchTo(oldcxt);
                                316                 :                : 
                                317                 :            542 :     return bistate;
                                318                 :                : }
                                319                 :                : 
                                320                 :                : /*
                                321                 :                :  * A tuple in the heap is being inserted.  To keep a brin index up to date,
                                322                 :                :  * we need to obtain the relevant index tuple and compare its stored values
                                323                 :                :  * with those of the new tuple.  If the tuple values are not consistent with
                                324                 :                :  * the summary tuple, we need to update the index tuple.
                                325                 :                :  *
                                326                 :                :  * If autosummarization is enabled, check if we need to summarize the previous
                                327                 :                :  * page range.
                                328                 :                :  *
                                329                 :                :  * If the range is not currently summarized (i.e. the revmap returns NULL for
                                330                 :                :  * it), there's nothing to do for this tuple.
                                331                 :                :  */
                                332                 :                : bool
 3010 tgl@sss.pgh.pa.us         333                 :CBC       63000 : brininsert(Relation idxRel, Datum *values, bool *nulls,
                                334                 :                :            ItemPointer heaptid, Relation heapRel,
                                335                 :                :            IndexUniqueCheck checkUnique,
                                336                 :                :            bool indexUnchanged,
                                337                 :                :            IndexInfo *indexInfo)
                                338                 :                : {
                                339                 :                :     BlockNumber pagesPerRange;
                                340                 :                :     BlockNumber origHeapBlk;
                                341                 :                :     BlockNumber heapBlk;
  141 tomas.vondra@postgre      342                 :GNC       63000 :     BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
                                343                 :                :     BrinRevmap *revmap;
                                344                 :                :     BrinDesc   *bdesc;
 3446 alvherre@alvh.no-ip.      345                 :CBC       63000 :     Buffer      buf = InvalidBuffer;
                                346                 :          63000 :     MemoryContext tupcxt = NULL;
 2621 tgl@sss.pgh.pa.us         347                 :          63000 :     MemoryContext oldcxt = CurrentMemoryContext;
 2570 alvherre@alvh.no-ip.      348   [ +  -  -  +  :          63000 :     bool        autosummarize = BrinGetAutoSummarize(idxRel);
                                              +  + ]
                                349                 :                : 
                                350                 :                :     /*
                                351                 :                :      * If first time through in this statement, initialize the insert state
                                352                 :                :      * that we keep for all the inserts in the command.
                                353                 :                :      */
  141 tomas.vondra@postgre      354         [ +  + ]:GNC       63000 :     if (!bistate)
                                355                 :            542 :         bistate = initialize_brin_insertstate(idxRel, indexInfo);
                                356                 :                : 
                                357                 :          63000 :     revmap = bistate->bis_rmAccess;
                                358                 :          63000 :     bdesc = bistate->bis_desc;
                                359                 :          63000 :     pagesPerRange = bistate->bis_pages_per_range;
                                360                 :                : 
                                361                 :                :     /*
                                362                 :                :      * origHeapBlk is the block number where the insertion occurred.  heapBlk
                                363                 :                :      * is the first block in the corresponding page range.
                                364                 :                :      */
 2570 alvherre@alvh.no-ip.      365                 :CBC       63000 :     origHeapBlk = ItemPointerGetBlockNumber(heaptid);
                                366                 :          63000 :     heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange;
                                367                 :                : 
                                368                 :                :     for (;;)
 3446 alvherre@alvh.no-ip.      369                 :UBC           0 :     {
 3446 alvherre@alvh.no-ip.      370                 :CBC       63000 :         bool        need_insert = false;
                                371                 :                :         OffsetNumber off;
                                372                 :                :         BrinTuple  *brtup;
                                373                 :                :         BrinMemTuple *dtup;
                                374                 :                : 
                                375         [ -  + ]:          63000 :         CHECK_FOR_INTERRUPTS();
                                376                 :                : 
                                377                 :                :         /*
                                378                 :                :          * If auto-summarization is enabled and we just inserted the first
                                379                 :                :          * tuple into the first block of a new non-first page range, request a
                                380                 :                :          * summarization run of the previous range.
                                381                 :                :          */
 2570                           382   [ +  +  +  + ]:          63000 :         if (autosummarize &&
                                383         [ +  - ]:             78 :             heapBlk > 0 &&
                                384         [ +  + ]:             78 :             heapBlk == origHeapBlk &&
                                385                 :             78 :             ItemPointerGetOffsetNumber(heaptid) == FirstOffsetNumber)
                                386                 :                :         {
                                387                 :              4 :             BlockNumber lastPageRange = heapBlk - 1;
                                388                 :                :             BrinTuple  *lastPageTuple;
                                389                 :                : 
                                390                 :                :             lastPageTuple =
                                391                 :              4 :                 brinGetTupleForHeapBlock(revmap, lastPageRange, &buf, &off,
                                392                 :                :                                          NULL, BUFFER_LOCK_SHARE);
                                393         [ +  + ]:              4 :             if (!lastPageTuple)
                                394                 :                :             {
                                395                 :                :                 bool        recorded;
                                396                 :                : 
 2223                           397                 :              3 :                 recorded = AutoVacuumRequestWork(AVW_BRINSummarizeRange,
                                398                 :                :                                                  RelationGetRelid(idxRel),
                                399                 :                :                                                  lastPageRange);
                                400         [ -  + ]:              3 :                 if (!recorded)
 2223 alvherre@alvh.no-ip.      401         [ #  # ]:UBC           0 :                     ereport(LOG,
                                402                 :                :                             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                403                 :                :                              errmsg("request for BRIN range summarization for index \"%s\" page %u was not recorded",
                                404                 :                :                                     RelationGetRelationName(idxRel),
                                405                 :                :                                     lastPageRange)));
                                406                 :                :             }
                                407                 :                :             else
 2511 alvherre@alvh.no-ip.      408                 :CBC           1 :                 LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                409                 :                :         }
                                410                 :                : 
 2570                           411                 :          63000 :         brtup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off,
                                412                 :                :                                          NULL, BUFFER_LOCK_SHARE);
                                413                 :                : 
                                414                 :                :         /* if range is unsummarized, there's nothing to do */
 3446                           415         [ +  + ]:          63000 :         if (!brtup)
                                416                 :          39060 :             break;
                                417                 :                : 
                                418                 :                :         /* First time through in this brininsert call? */
 2621 tgl@sss.pgh.pa.us         419         [ +  - ]:          23940 :         if (tupcxt == NULL)
                                420                 :                :         {
 3446 alvherre@alvh.no-ip.      421                 :          23940 :             tupcxt = AllocSetContextCreate(CurrentMemoryContext,
                                422                 :                :                                            "brininsert cxt",
                                423                 :                :                                            ALLOCSET_DEFAULT_SIZES);
 2621 tgl@sss.pgh.pa.us         424                 :          23940 :             MemoryContextSwitchTo(tupcxt);
                                425                 :                :         }
                                426                 :                : 
 2564 alvherre@alvh.no-ip.      427                 :          23940 :         dtup = brin_deform_tuple(bdesc, brtup, NULL);
                                428                 :                : 
 1118 tomas.vondra@postgre      429                 :          23940 :         need_insert = add_values_to_range(idxRel, bdesc, dtup, values, nulls);
                                430                 :                : 
 3446 alvherre@alvh.no-ip.      431         [ +  + ]:          23940 :         if (!need_insert)
                                432                 :                :         {
                                433                 :                :             /*
                                434                 :                :              * The tuple is consistent with the new values, so there's nothing
                                435                 :                :              * to do.
                                436                 :                :              */
                                437                 :          11986 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                438                 :                :         }
                                439                 :                :         else
                                440                 :                :         {
 2916 kgrittn@postgresql.o      441                 :          11954 :             Page        page = BufferGetPage(buf);
 3446 alvherre@alvh.no-ip.      442                 :          11954 :             ItemId      lp = PageGetItemId(page, off);
                                443                 :                :             Size        origsz;
                                444                 :                :             BrinTuple  *origtup;
                                445                 :                :             Size        newsz;
                                446                 :                :             BrinTuple  *newtup;
                                447                 :                :             bool        samepage;
                                448                 :                : 
                                449                 :                :             /*
                                450                 :                :              * Make a copy of the old tuple, so that we can compare it after
                                451                 :                :              * re-acquiring the lock.
                                452                 :                :              */
                                453                 :          11954 :             origsz = ItemIdGetLength(lp);
 2564                           454                 :          11954 :             origtup = brin_copy_tuple(brtup, origsz, NULL, NULL);
                                455                 :                : 
                                456                 :                :             /*
                                457                 :                :              * Before releasing the lock, check if we can attempt a same-page
                                458                 :                :              * update.  Another process could insert a tuple concurrently in
                                459                 :                :              * the same page though, so downstream we must be prepared to cope
                                460                 :                :              * if this turns out to not be possible after all.
                                461                 :                :              */
 3445                           462                 :          11954 :             newtup = brin_form_tuple(bdesc, heapBlk, dtup, &newsz);
 3446                           463                 :          11954 :             samepage = brin_can_do_samepage_update(buf, origsz, newsz);
                                464                 :          11954 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                465                 :                : 
                                466                 :                :             /*
                                467                 :                :              * Try to update the tuple.  If this doesn't work for whatever
                                468                 :                :              * reason, we need to restart from the top; the revmap might be
                                469                 :                :              * pointing at a different tuple for this block now, so we need to
                                470                 :                :              * recompute to ensure both our new heap tuple and the other
                                471                 :                :              * inserter's are covered by the combined tuple.  It might be that
                                472                 :                :              * we don't need to update at all.
                                473                 :                :              */
                                474         [ -  + ]:          11954 :             if (!brin_doupdate(idxRel, pagesPerRange, revmap, heapBlk,
                                475                 :                :                                buf, off, origtup, origsz, newtup, newsz,
                                476                 :                :                                samepage))
                                477                 :                :             {
                                478                 :                :                 /* no luck; start over */
  151 nathan@postgresql.or      479                 :UNC           0 :                 MemoryContextReset(tupcxt);
 3446 alvherre@alvh.no-ip.      480                 :UBC           0 :                 continue;
                                481                 :                :             }
                                482                 :                :         }
                                483                 :                : 
                                484                 :                :         /* success! */
 3446 alvherre@alvh.no-ip.      485                 :CBC       23940 :         break;
                                486                 :                :     }
                                487                 :                : 
                                488         [ +  + ]:          63000 :     if (BufferIsValid(buf))
                                489                 :          23941 :         ReleaseBuffer(buf);
 2621 tgl@sss.pgh.pa.us         490                 :          63000 :     MemoryContextSwitchTo(oldcxt);
                                491         [ +  + ]:          63000 :     if (tupcxt != NULL)
 3446 alvherre@alvh.no-ip.      492                 :          23940 :         MemoryContextDelete(tupcxt);
                                493                 :                : 
 3010 tgl@sss.pgh.pa.us         494                 :          63000 :     return false;
                                495                 :                : }
                                496                 :                : 
                                497                 :                : /*
                                498                 :                :  * Callback to clean up the BrinInsertState once all tuple inserts are done.
                                499                 :                :  */
                                500                 :                : void
  141 tomas.vondra@postgre      501                 :GNC         542 : brininsertcleanup(IndexInfo *indexInfo)
                                502                 :                : {
                                503                 :            542 :     BrinInsertState *bistate = (BrinInsertState *) indexInfo->ii_AmCache;
                                504                 :                : 
                                505         [ -  + ]:            542 :     Assert(bistate);
                                506                 :                : 
                                507                 :                :     /*
                                508                 :                :      * Clean up the revmap. Note that the brinDesc has already been cleaned up
                                509                 :                :      * as part of its own memory context.
                                510                 :                :      */
                                511                 :            542 :     brinRevmapTerminate(bistate->bis_rmAccess);
                                512                 :            542 :     bistate->bis_rmAccess = NULL;
                                513                 :            542 :     bistate->bis_desc = NULL;
                                514                 :            542 : }
                                515                 :                : 
                                516                 :                : /*
                                517                 :                :  * Initialize state for a BRIN index scan.
                                518                 :                :  *
                                519                 :                :  * We read the metapage here to determine the pages-per-range number that this
                                520                 :                :  * index was built with.  Note that since this cannot be changed while we're
                                521                 :                :  * holding lock on index, it's not necessary to recompute it during brinrescan.
                                522                 :                :  */
                                523                 :                : IndexScanDesc
 3010 tgl@sss.pgh.pa.us         524                 :CBC        1473 : brinbeginscan(Relation r, int nkeys, int norderbys)
                                525                 :                : {
                                526                 :                :     IndexScanDesc scan;
                                527                 :                :     BrinOpaque *opaque;
                                528                 :                : 
 3446 alvherre@alvh.no-ip.      529                 :           1473 :     scan = RelationGetIndexScan(r, nkeys, norderbys);
                                530                 :                : 
  580 peter@eisentraut.org      531                 :           1473 :     opaque = palloc_object(BrinOpaque);
  219 tmunro@postgresql.or      532                 :GNC        1473 :     opaque->bo_rmAccess = brinRevmapInitialize(r, &opaque->bo_pagesPerRange);
 3446 alvherre@alvh.no-ip.      533                 :CBC        1473 :     opaque->bo_bdesc = brin_build_desc(r);
                                534                 :           1473 :     scan->opaque = opaque;
                                535                 :                : 
 3010 tgl@sss.pgh.pa.us         536                 :           1473 :     return scan;
                                537                 :                : }
                                538                 :                : 
                                539                 :                : /*
                                540                 :                :  * Execute the index scan.
                                541                 :                :  *
                                542                 :                :  * This works by reading index TIDs from the revmap, and obtaining the index
                                543                 :                :  * tuples pointed to by them; the summary values in the index tuples are
                                544                 :                :  * compared to the scan keys.  We return into the TID bitmap all the pages in
                                545                 :                :  * ranges corresponding to index tuples that match the scan keys.
                                546                 :                :  *
                                547                 :                :  * If a TID from the revmap is read as InvalidTID, we know that range is
                                548                 :                :  * unsummarized.  Pages in those ranges need to be returned regardless of scan
                                549                 :                :  * keys.
                                550                 :                :  */
                                551                 :                : int64
                                552                 :           1473 : bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
                                553                 :                : {
 3446 alvherre@alvh.no-ip.      554                 :           1473 :     Relation    idxRel = scan->indexRelation;
                                555                 :           1473 :     Buffer      buf = InvalidBuffer;
                                556                 :                :     BrinDesc   *bdesc;
                                557                 :                :     Oid         heapOid;
                                558                 :                :     Relation    heapRel;
                                559                 :                :     BrinOpaque *opaque;
                                560                 :                :     BlockNumber nblocks;
                                561                 :                :     BlockNumber heapBlk;
                                562                 :           1473 :     int         totalpages = 0;
                                563                 :                :     FmgrInfo   *consistentFn;
                                564                 :                :     MemoryContext oldcxt;
                                565                 :                :     MemoryContext perRangeCxt;
                                566                 :                :     BrinMemTuple *dtup;
 2524 bruce@momjian.us          567                 :           1473 :     BrinTuple  *btup = NULL;
 2564 alvherre@alvh.no-ip.      568                 :           1473 :     Size        btupsz = 0;
                                569                 :                :     ScanKey   **keys,
                                570                 :                :               **nullkeys;
                                571                 :                :     int        *nkeys,
                                572                 :                :                *nnullkeys;
                                573                 :                :     char       *ptr;
                                574                 :                :     Size        len;
                                575                 :                :     char       *tmp PG_USED_FOR_ASSERTS_ONLY;
                                576                 :                : 
 3446                           577                 :           1473 :     opaque = (BrinOpaque *) scan->opaque;
                                578                 :           1473 :     bdesc = opaque->bo_bdesc;
                                579   [ -  +  -  -  :           1473 :     pgstat_count_index_scan(idxRel);
                                              +  - ]
                                580                 :                : 
                                581                 :                :     /*
                                582                 :                :      * We need to know the size of the table so that we know how long to
                                583                 :                :      * iterate on the revmap.
                                584                 :                :      */
                                585                 :           1473 :     heapOid = IndexGetRelation(RelationGetRelid(idxRel), false);
 1910 andres@anarazel.de        586                 :           1473 :     heapRel = table_open(heapOid, AccessShareLock);
 3446 alvherre@alvh.no-ip.      587                 :           1473 :     nblocks = RelationGetNumberOfBlocks(heapRel);
 1910 andres@anarazel.de        588                 :           1473 :     table_close(heapRel, AccessShareLock);
                                589                 :                : 
                                590                 :                :     /*
                                591                 :                :      * Make room for the consistent support procedures of indexed columns.  We
                                592                 :                :      * don't look them up here; we do that lazily the first time we see a scan
                                593                 :                :      * key reference each of them.  We rely on zeroing fn_oid to InvalidOid.
                                594                 :                :      */
  580 peter@eisentraut.org      595                 :           1473 :     consistentFn = palloc0_array(FmgrInfo, bdesc->bd_tupdesc->natts);
                                596                 :                : 
                                597                 :                :     /*
                                598                 :                :      * Make room for per-attribute lists of scan keys that we'll pass to the
                                599                 :                :      * consistent support procedure. We don't know which attributes have scan
                                600                 :                :      * keys, so we allocate space for all attributes. That may use more memory
                                601                 :                :      * but it's probably cheaper than determining which attributes are used.
                                602                 :                :      *
                                603                 :                :      * We keep null and regular keys separate, so that we can pass just the
                                604                 :                :      * regular keys to the consistent function easily.
                                605                 :                :      *
                                606                 :                :      * To reduce the allocation overhead, we allocate one big chunk and then
                                607                 :                :      * carve it into smaller arrays ourselves. All the pieces have exactly the
                                608                 :                :      * same lifetime, so that's OK.
                                609                 :                :      *
                                610                 :                :      * XXX The widest index can have 32 attributes, so the amount of wasted
                                611                 :                :      * memory is negligible. We could invent a more compact approach (with
                                612                 :                :      * just space for used attributes) but that would make the matching more
                                613                 :                :      * complex so it's not a good trade-off.
                                614                 :                :      */
 1118 tomas.vondra@postgre      615                 :           1473 :     len =
                                616                 :           1473 :         MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) +  /* regular keys */
                                617                 :           1473 :         MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
                                618                 :           1473 :         MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts) +
                                619                 :           1473 :         MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts) +  /* NULL keys */
                                620                 :           1473 :         MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys) * bdesc->bd_tupdesc->natts +
                                621                 :           1473 :         MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
                                622                 :                : 
                                623                 :           1473 :     ptr = palloc(len);
                                624                 :           1473 :     tmp = ptr;
                                625                 :                : 
                                626                 :           1473 :     keys = (ScanKey **) ptr;
                                627                 :           1473 :     ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
                                628                 :                : 
                                629                 :           1473 :     nullkeys = (ScanKey **) ptr;
                                630                 :           1473 :     ptr += MAXALIGN(sizeof(ScanKey *) * bdesc->bd_tupdesc->natts);
                                631                 :                : 
                                632                 :           1473 :     nkeys = (int *) ptr;
                                633                 :           1473 :     ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
                                634                 :                : 
                                635                 :           1473 :     nnullkeys = (int *) ptr;
                                636                 :           1473 :     ptr += MAXALIGN(sizeof(int) * bdesc->bd_tupdesc->natts);
                                637                 :                : 
                                638         [ +  + ]:          34989 :     for (int i = 0; i < bdesc->bd_tupdesc->natts; i++)
                                639                 :                :     {
                                640                 :          33516 :         keys[i] = (ScanKey *) ptr;
                                641                 :          33516 :         ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
                                642                 :                : 
                                643                 :          33516 :         nullkeys[i] = (ScanKey *) ptr;
                                644                 :          33516 :         ptr += MAXALIGN(sizeof(ScanKey) * scan->numberOfKeys);
                                645                 :                :     }
                                646                 :                : 
                                647         [ -  + ]:           1473 :     Assert(tmp + len == ptr);
                                648                 :                : 
                                649                 :                :     /* zero the number of keys */
                                650                 :           1473 :     memset(nkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
                                651                 :           1473 :     memset(nnullkeys, 0, sizeof(int) * bdesc->bd_tupdesc->natts);
                                652                 :                : 
                                653                 :                :     /* Preprocess the scan keys - split them into per-attribute arrays. */
  599 drowley@postgresql.o      654         [ +  + ]:           2946 :     for (int keyno = 0; keyno < scan->numberOfKeys; keyno++)
                                655                 :                :     {
 1118 tomas.vondra@postgre      656                 :           1473 :         ScanKey     key = &scan->keyData[keyno];
                                657                 :           1473 :         AttrNumber  keyattno = key->sk_attno;
                                658                 :                : 
                                659                 :                :         /*
                                660                 :                :          * The collation of the scan key must match the collation used in the
                                661                 :                :          * index column (but only if the search is not IS NULL/ IS NOT NULL).
                                662                 :                :          * Otherwise we shouldn't be using this index ...
                                663                 :                :          */
                                664   [ +  +  -  + ]:           1473 :         Assert((key->sk_flags & SK_ISNULL) ||
                                665                 :                :                (key->sk_collation ==
                                666                 :                :                 TupleDescAttr(bdesc->bd_tupdesc,
                                667                 :                :                               keyattno - 1)->attcollation));
                                668                 :                : 
                                669                 :                :         /*
                                670                 :                :          * First time we see this index attribute, so init as needed.
                                671                 :                :          *
                                672                 :                :          * This is a bit of an overkill - we don't know how many scan keys are
                                673                 :                :          * there for this attribute, so we simply allocate the largest number
                                674                 :                :          * possible (as if all keys were for this attribute). This may waste a
                                675                 :                :          * bit of memory, but we only expect small number of scan keys in
                                676                 :                :          * general, so this should be negligible, and repeated repalloc calls
                                677                 :                :          * are not free either.
                                678                 :                :          */
                                679         [ +  - ]:           1473 :         if (consistentFn[keyattno - 1].fn_oid == InvalidOid)
                                680                 :                :         {
                                681                 :                :             FmgrInfo   *tmp;
                                682                 :                : 
                                683                 :                :             /* First time we see this attribute, so no key/null keys. */
                                684         [ -  + ]:           1473 :             Assert(nkeys[keyattno - 1] == 0);
                                685         [ -  + ]:           1473 :             Assert(nnullkeys[keyattno - 1] == 0);
                                686                 :                : 
                                687                 :           1473 :             tmp = index_getprocinfo(idxRel, keyattno,
                                688                 :                :                                     BRIN_PROCNUM_CONSISTENT);
                                689                 :           1473 :             fmgr_info_copy(&consistentFn[keyattno - 1], tmp,
                                690                 :                :                            CurrentMemoryContext);
                                691                 :                :         }
                                692                 :                : 
                                693                 :                :         /* Add key to the proper per-attribute array. */
                                694         [ +  + ]:           1473 :         if (key->sk_flags & SK_ISNULL)
                                695                 :                :         {
                                696                 :             18 :             nullkeys[keyattno - 1][nnullkeys[keyattno - 1]] = key;
                                697                 :             18 :             nnullkeys[keyattno - 1]++;
                                698                 :                :         }
                                699                 :                :         else
                                700                 :                :         {
                                701                 :           1455 :             keys[keyattno - 1][nkeys[keyattno - 1]] = key;
                                702                 :           1455 :             nkeys[keyattno - 1]++;
                                703                 :                :         }
                                704                 :                :     }
                                705                 :                : 
                                706                 :                :     /* allocate an initial in-memory tuple, out of the per-range memcxt */
 2564 alvherre@alvh.no-ip.      707                 :           1473 :     dtup = brin_new_memtuple(bdesc);
                                708                 :                : 
                                709                 :                :     /*
                                710                 :                :      * Setup and use a per-range memory context, which is reset every time we
                                711                 :                :      * loop below.  This avoids having to free the tuples within the loop.
                                712                 :                :      */
 3446                           713                 :           1473 :     perRangeCxt = AllocSetContextCreate(CurrentMemoryContext,
                                714                 :                :                                         "bringetbitmap cxt",
                                715                 :                :                                         ALLOCSET_DEFAULT_SIZES);
                                716                 :           1473 :     oldcxt = MemoryContextSwitchTo(perRangeCxt);
                                717                 :                : 
                                718                 :                :     /*
                                719                 :                :      * Now scan the revmap.  We start by querying for heap page 0,
                                720                 :                :      * incrementing by the number of pages per range; this gives us a full
                                721                 :                :      * view of the table.
                                722                 :                :      */
                                723         [ +  + ]:          97299 :     for (heapBlk = 0; heapBlk < nblocks; heapBlk += opaque->bo_pagesPerRange)
                                724                 :                :     {
                                725                 :                :         bool        addrange;
 2564                           726                 :          95826 :         bool        gottuple = false;
                                727                 :                :         BrinTuple  *tup;
                                728                 :                :         OffsetNumber off;
                                729                 :                :         Size        size;
                                730                 :                : 
 3446                           731         [ -  + ]:          95826 :         CHECK_FOR_INTERRUPTS();
                                732                 :                : 
  151 nathan@postgresql.or      733                 :GNC       95826 :         MemoryContextReset(perRangeCxt);
                                734                 :                : 
 3446 alvherre@alvh.no-ip.      735                 :CBC       95826 :         tup = brinGetTupleForHeapBlock(opaque->bo_rmAccess, heapBlk, &buf,
                                736                 :                :                                        &off, &size, BUFFER_LOCK_SHARE);
                                737         [ +  + ]:          95826 :         if (tup)
                                738                 :                :         {
 2564                           739                 :          94968 :             gottuple = true;
                                740                 :          94968 :             btup = brin_copy_tuple(tup, size, btup, &btupsz);
 3446                           741                 :          94968 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                                742                 :                :         }
                                743                 :                : 
                                744                 :                :         /*
                                745                 :                :          * For page ranges with no indexed tuple, we must return the whole
                                746                 :                :          * range; otherwise, compare it to the scan keys.
                                747                 :                :          */
 2564                           748         [ +  + ]:          95826 :         if (!gottuple)
                                749                 :                :         {
 3446 alvherre@alvh.no-ip.      750                 :GBC         858 :             addrange = true;
                                751                 :                :         }
                                752                 :                :         else
                                753                 :                :         {
 2564 alvherre@alvh.no-ip.      754                 :CBC       94968 :             dtup = brin_deform_tuple(bdesc, btup, dtup);
 3446                           755         [ -  + ]:          94968 :             if (dtup->bt_placeholder)
                                756                 :                :             {
                                757                 :                :                 /*
                                758                 :                :                  * Placeholder tuples are always returned, regardless of the
                                759                 :                :                  * values stored in them.
                                760                 :                :                  */
 3446 alvherre@alvh.no-ip.      761                 :UBC           0 :                 addrange = true;
                                762                 :                :             }
                                763                 :                :             else
                                764                 :                :             {
                                765                 :                :                 int         attno;
                                766                 :                : 
                                767                 :                :                 /*
                                768                 :                :                  * Compare scan keys with summary values stored for the range.
                                769                 :                :                  * If scan keys are matched, the page range must be added to
                                770                 :                :                  * the bitmap.  We initially assume the range needs to be
                                771                 :                :                  * added; in particular this serves the case where there are
                                772                 :                :                  * no keys.
                                773                 :                :                  */
 3446 alvherre@alvh.no-ip.      774                 :CBC       94968 :                 addrange = true;
 1118 tomas.vondra@postgre      775         [ +  + ]:        2352034 :                 for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
                                776                 :                :                 {
                                777                 :                :                     BrinValues *bval;
                                778                 :                :                     Datum       add;
                                779                 :                :                     Oid         collation;
                                780                 :                : 
                                781                 :                :                     /*
                                782                 :                :                      * skip attributes without any scan keys (both regular and
                                783                 :                :                      * IS [NOT] NULL)
                                784                 :                :                      */
                                785   [ +  +  +  + ]:        2283867 :                     if (nkeys[attno - 1] == 0 && nnullkeys[attno - 1] == 0)
                                786                 :        2188899 :                         continue;
                                787                 :                : 
                                788                 :          94968 :                     bval = &dtup->bt_columns[attno - 1];
                                789                 :                : 
                                790                 :                :                     /*
                                791                 :                :                      * If the BRIN tuple indicates that this range is empty,
                                792                 :                :                      * we can skip it: there's nothing to match.  We don't
                                793                 :                :                      * need to examine the next columns.
                                794                 :                :                      */
  331                           795         [ -  + ]:          94968 :                     if (dtup->bt_empty_range)
                                796                 :                :                     {
  331 tomas.vondra@postgre      797                 :UBC           0 :                         addrange = false;
                                798                 :              0 :                         break;
                                799                 :                :                     }
                                800                 :                : 
                                801                 :                :                     /*
                                802                 :                :                      * First check if there are any IS [NOT] NULL scan keys,
                                803                 :                :                      * and if we're violating them. In that case we can
                                804                 :                :                      * terminate early, without invoking the support function.
                                805                 :                :                      *
                                806                 :                :                      * As there may be more keys, we can only determine
                                807                 :                :                      * mismatch within this loop.
                                808                 :                :                      */
 1118 tomas.vondra@postgre      809         [ +  - ]:CBC       94968 :                     if (bdesc->bd_info[attno - 1]->oi_regular_nulls &&
                                810         [ +  + ]:          94968 :                         !check_null_keys(bval, nullkeys[attno - 1],
                                811                 :          94968 :                                          nnullkeys[attno - 1]))
                                812                 :                :                     {
                                813                 :                :                         /*
                                814                 :                :                          * If any of the IS [NOT] NULL keys failed, the page
                                815                 :                :                          * range as a whole can't pass. So terminate the loop.
                                816                 :                :                          */
                                817                 :            498 :                         addrange = false;
                                818                 :            498 :                         break;
                                819                 :                :                     }
                                820                 :                : 
                                821                 :                :                     /*
                                822                 :                :                      * So either there are no IS [NOT] NULL keys, or all
                                823                 :                :                      * passed. If there are no regular scan keys, we're done -
                                824                 :                :                      * the page range matches. If there are regular keys, but
                                825                 :                :                      * the page range is marked as 'all nulls' it can't
                                826                 :                :                      * possibly pass (we're assuming the operators are
                                827                 :                :                      * strict).
                                828                 :                :                      */
                                829                 :                : 
                                830                 :                :                     /* No regular scan keys - page range as a whole passes. */
                                831         [ +  + ]:          94470 :                     if (!nkeys[attno - 1])
                                832                 :            618 :                         continue;
                                833                 :                : 
                                834   [ +  -  -  + ]:          93852 :                     Assert((nkeys[attno - 1] > 0) &&
                                835                 :                :                            (nkeys[attno - 1] <= scan->numberOfKeys));
                                836                 :                : 
                                837                 :                :                     /* If it is all nulls, it cannot possibly be consistent. */
                                838         [ +  + ]:          93852 :                     if (bval->bv_allnulls)
                                839                 :                :                     {
                                840                 :            189 :                         addrange = false;
                                841                 :            189 :                         break;
                                842                 :                :                     }
                                843                 :                : 
                                844                 :                :                     /*
                                845                 :                :                      * Collation from the first key (has to be the same for
                                846                 :                :                      * all keys for the same attribute).
                                847                 :                :                      */
 1115                           848                 :          93663 :                     collation = keys[attno - 1][0]->sk_collation;
                                849                 :                : 
                                850                 :                :                     /*
                                851                 :                :                      * Check whether the scan key is consistent with the page
                                852                 :                :                      * range values; if so, have the pages in the range added
                                853                 :                :                      * to the output bitmap.
                                854                 :                :                      *
                                855                 :                :                      * The opclass may or may not support processing of
                                856                 :                :                      * multiple scan keys. We can determine that based on the
                                857                 :                :                      * number of arguments - functions with extra parameter
                                858                 :                :                      * (number of scan keys) do support this, otherwise we
                                859                 :                :                      * have to simply pass the scan keys one by one.
                                860                 :                :                      */
                                861         [ +  + ]:          93663 :                     if (consistentFn[attno - 1].fn_nargs >= 4)
                                862                 :                :                     {
                                863                 :                :                         /* Check all keys at once */
                                864                 :          19797 :                         add = FunctionCall4Coll(&consistentFn[attno - 1],
                                865                 :                :                                                 collation,
                                866                 :                :                                                 PointerGetDatum(bdesc),
                                867                 :                :                                                 PointerGetDatum(bval),
                                868                 :          19797 :                                                 PointerGetDatum(keys[attno - 1]),
                                869                 :          19797 :                                                 Int32GetDatum(nkeys[attno - 1]));
                                870                 :          19797 :                         addrange = DatumGetBool(add);
                                871                 :                :                     }
                                872                 :                :                     else
                                873                 :                :                     {
                                874                 :                :                         /*
                                875                 :                :                          * Check keys one by one
                                876                 :                :                          *
                                877                 :                :                          * When there are multiple scan keys, failure to meet
                                878                 :                :                          * the criteria for a single one of them is enough to
                                879                 :                :                          * discard the range as a whole, so break out of the
                                880                 :                :                          * loop as soon as a false return value is obtained.
                                881                 :                :                          */
                                882                 :                :                         int         keyno;
                                883                 :                : 
                                884         [ +  + ]:         129039 :                         for (keyno = 0; keyno < nkeys[attno - 1]; keyno++)
                                885                 :                :                         {
                                886                 :          73866 :                             add = FunctionCall3Coll(&consistentFn[attno - 1],
                                887                 :          73866 :                                                     keys[attno - 1][keyno]->sk_collation,
                                888                 :                :                                                     PointerGetDatum(bdesc),
                                889                 :                :                                                     PointerGetDatum(bval),
                                890                 :          73866 :                                                     PointerGetDatum(keys[attno - 1][keyno]));
                                891                 :          73866 :                             addrange = DatumGetBool(add);
                                892         [ +  + ]:          73866 :                             if (!addrange)
                                893                 :          18693 :                                 break;
                                894                 :                :                         }
                                895                 :                :                     }
                                896                 :                : 
                                897                 :                :                     /*
                                898                 :                :                      * If we found a scan key eliminating the range, no need
                                899                 :                :                      * to check additional ones.
                                900                 :                :                      */
  420                           901         [ +  + ]:          93663 :                     if (!addrange)
                                902                 :          26114 :                         break;
                                903                 :                :                 }
                                904                 :                :             }
                                905                 :                :         }
                                906                 :                : 
                                907                 :                :         /* add the pages in the range to the output bitmap, if needed */
 3446 alvherre@alvh.no-ip.      908         [ +  + ]:          95826 :         if (addrange)
                                909                 :                :         {
                                910                 :                :             BlockNumber pageno;
                                911                 :                : 
                                912                 :          69025 :             for (pageno = heapBlk;
 1103 tomas.vondra@postgre      913         [ +  + ]:         143010 :                  pageno <= Min(nblocks, heapBlk + opaque->bo_pagesPerRange) - 1;
 3446 alvherre@alvh.no-ip.      914                 :          73985 :                  pageno++)
                                915                 :                :             {
                                916                 :          73985 :                 MemoryContextSwitchTo(oldcxt);
                                917                 :          73985 :                 tbm_add_page(tbm, pageno);
                                918                 :          73985 :                 totalpages++;
                                919                 :          73985 :                 MemoryContextSwitchTo(perRangeCxt);
                                920                 :                :             }
                                921                 :                :         }
                                922                 :                :     }
                                923                 :                : 
                                924                 :           1473 :     MemoryContextSwitchTo(oldcxt);
                                925                 :           1473 :     MemoryContextDelete(perRangeCxt);
                                926                 :                : 
                                927         [ +  - ]:           1473 :     if (buf != InvalidBuffer)
                                928                 :           1473 :         ReleaseBuffer(buf);
                                929                 :                : 
                                930                 :                :     /*
                                931                 :                :      * XXX We have an approximation of the number of *pages* that our scan
                                932                 :                :      * returns, but we don't have a precise idea of the number of heap tuples
                                933                 :                :      * involved.
                                934                 :                :      */
 3010 tgl@sss.pgh.pa.us         935                 :           1473 :     return totalpages * 10;
                                936                 :                : }
                                937                 :                : 
                                938                 :                : /*
                                939                 :                :  * Re-initialize state for a BRIN index scan
                                940                 :                :  */
                                941                 :                : void
                                942                 :           1473 : brinrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
                                943                 :                :            ScanKey orderbys, int norderbys)
                                944                 :                : {
                                945                 :                :     /*
                                946                 :                :      * Other index AMs preprocess the scan keys at this point, or sometime
                                947                 :                :      * early during the scan; this lets them optimize by removing redundant
                                948                 :                :      * keys, or doing early returns when they are impossible to satisfy; see
                                949                 :                :      * _bt_preprocess_keys for an example.  Something like that could be added
                                950                 :                :      * here someday, too.
                                951                 :                :      */
                                952                 :                : 
 3446 alvherre@alvh.no-ip.      953   [ +  -  +  - ]:           1473 :     if (scankey && scan->numberOfKeys > 0)
                                954                 :           1473 :         memmove(scan->keyData, scankey,
                                955                 :           1473 :                 scan->numberOfKeys * sizeof(ScanKeyData));
                                956                 :           1473 : }
                                957                 :                : 
                                958                 :                : /*
                                959                 :                :  * Close down a BRIN index scan
                                960                 :                :  */
                                961                 :                : void
 3010 tgl@sss.pgh.pa.us         962                 :           1473 : brinendscan(IndexScanDesc scan)
                                963                 :                : {
 3446 alvherre@alvh.no-ip.      964                 :           1473 :     BrinOpaque *opaque = (BrinOpaque *) scan->opaque;
                                965                 :                : 
                                966                 :           1473 :     brinRevmapTerminate(opaque->bo_rmAccess);
                                967                 :           1473 :     brin_free_desc(opaque->bo_bdesc);
                                968                 :           1473 :     pfree(opaque);
                                969                 :           1473 : }
                                970                 :                : 
                                971                 :                : /*
                                972                 :                :  * Per-heap-tuple callback for table_index_build_scan.
                                973                 :                :  *
                                974                 :                :  * Note we don't worry about the page range at the end of the table here; it is
                                975                 :                :  * present in the build state struct after we're called the last time, but not
                                976                 :                :  * inserted into the index.  Caller must ensure to do so, if appropriate.
                                977                 :                :  */
                                978                 :                : static void
                                979                 :         365210 : brinbuildCallback(Relation index,
                                980                 :                :                   ItemPointer tid,
                                981                 :                :                   Datum *values,
                                982                 :                :                   bool *isnull,
                                983                 :                :                   bool tupleIsAlive,
                                984                 :                :                   void *brstate)
                                985                 :                : {
                                986                 :         365210 :     BrinBuildState *state = (BrinBuildState *) brstate;
                                987                 :                :     BlockNumber thisblock;
                                988                 :                : 
 1619 andres@anarazel.de        989                 :         365210 :     thisblock = ItemPointerGetBlockNumber(tid);
                                990                 :                : 
                                991                 :                :     /*
                                992                 :                :      * If we're in a block that belongs to a future range, summarize what
                                993                 :                :      * we've got and start afresh.  Note the scan might have skipped many
                                994                 :                :      * pages, if they were devoid of live tuples; make sure to insert index
                                995                 :                :      * tuples for those too.
                                996                 :                :      */
 3446 alvherre@alvh.no-ip.      997         [ +  + ]:         366344 :     while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1)
                                998                 :                :     {
                                999                 :                : 
                               1000                 :                :         BRIN_elog((DEBUG2,
                               1001                 :                :                    "brinbuildCallback: completed a range: %u--%u",
                               1002                 :                :                    state->bs_currRangeStart,
                               1003                 :                :                    state->bs_currRangeStart + state->bs_pagesPerRange));
                               1004                 :                : 
                               1005                 :                :         /* create the index tuple and insert it */
                               1006                 :           1134 :         form_and_insert_tuple(state);
                               1007                 :                : 
                               1008                 :                :         /* set state to correspond to the next range */
                               1009                 :           1134 :         state->bs_currRangeStart += state->bs_pagesPerRange;
                               1010                 :                : 
                               1011                 :                :         /* re-initialize state for it */
                               1012                 :           1134 :         brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
                               1013                 :                :     }
                               1014                 :                : 
                               1015                 :                :     /* Accumulate the current tuple into the running state */
 1118 tomas.vondra@postgre     1016                 :         365210 :     (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
                               1017                 :                :                                values, isnull);
 3446 alvherre@alvh.no-ip.     1018                 :         365210 : }
                               1019                 :                : 
                               1020                 :                : /*
                               1021                 :                :  * Per-heap-tuple callback for table_index_build_scan with parallelism.
                               1022                 :                :  *
                               1023                 :                :  * A version of the callback used by parallel index builds. The main difference
                               1024                 :                :  * is that instead of writing the BRIN tuples into the index, we write them
                               1025                 :                :  * into a shared tuplesort, and leave the insertion up to the leader (which may
                               1026                 :                :  * reorder them a bit etc.). The callback also does not generate empty ranges,
                               1027                 :                :  * those will be added by the leader when merging results from workers.
                               1028                 :                :  */
                               1029                 :                : static void
  128 tomas.vondra@postgre     1030                 :GNC        4501 : brinbuildCallbackParallel(Relation index,
                               1031                 :                :                           ItemPointer tid,
                               1032                 :                :                           Datum *values,
                               1033                 :                :                           bool *isnull,
                               1034                 :                :                           bool tupleIsAlive,
                               1035                 :                :                           void *brstate)
                               1036                 :                : {
                               1037                 :           4501 :     BrinBuildState *state = (BrinBuildState *) brstate;
                               1038                 :                :     BlockNumber thisblock;
                               1039                 :                : 
                               1040                 :           4501 :     thisblock = ItemPointerGetBlockNumber(tid);
                               1041                 :                : 
                               1042                 :                :     /*
                               1043                 :                :      * If we're in a block that belongs to a different range, summarize what
                               1044                 :                :      * we've got and start afresh.  Note the scan might have skipped many
                               1045                 :                :      * pages, if they were devoid of live tuples; we do not create empty BRIN
                               1046                 :                :      * ranges here - the leader is responsible for filling them in.
                               1047                 :                :      *
                               1048                 :                :      * Unlike serial builds, parallel index builds allow synchronized seqscans
                               1049                 :                :      * (because that's what parallel scans do). This means the block may wrap
                               1050                 :                :      * around to the beginning of the relation, so the condition needs to
                               1051                 :                :      * check for both future and past ranges.
                               1052                 :                :      */
  106                          1053         [ +  - ]:           4501 :     if ((thisblock < state->bs_currRangeStart) ||
                               1054         [ +  + ]:           4501 :         (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1))
                               1055                 :                :     {
                               1056                 :                : 
                               1057                 :                :         BRIN_elog((DEBUG2,
                               1058                 :                :                    "brinbuildCallbackParallel: completed a range: %u--%u",
                               1059                 :                :                    state->bs_currRangeStart,
                               1060                 :                :                    state->bs_currRangeStart + state->bs_pagesPerRange));
                               1061                 :                : 
                               1062                 :                :         /* create the index tuple and write it into the tuplesort */
  128                          1063                 :             23 :         form_and_spill_tuple(state);
                               1064                 :                : 
                               1065                 :                :         /*
                               1066                 :                :          * Set state to correspond to the next range (for this block).
                               1067                 :                :          *
                               1068                 :                :          * This skips ranges that are either empty (and so we don't get any
                               1069                 :                :          * tuples to summarize), or processed by other workers. We can't
                               1070                 :                :          * differentiate those cases here easily, so we leave it up to the
                               1071                 :                :          * leader to fill empty ranges where needed.
                               1072                 :                :          */
                               1073                 :                :         state->bs_currRangeStart
                               1074                 :             23 :             = state->bs_pagesPerRange * (thisblock / state->bs_pagesPerRange);
                               1075                 :                : 
                               1076                 :                :         /* re-initialize state for it */
                               1077                 :             23 :         brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
                               1078                 :                :     }
                               1079                 :                : 
                               1080                 :                :     /* Accumulate the current tuple into the running state */
                               1081                 :           4501 :     (void) add_values_to_range(index, state->bs_bdesc, state->bs_dtuple,
                               1082                 :                :                                values, isnull);
                               1083                 :           4501 : }
                               1084                 :                : 
                               1085                 :                : /*
                               1086                 :                :  * brinbuild() -- build a new BRIN index.
                               1087                 :                :  */
                               1088                 :                : IndexBuildResult *
 3010 tgl@sss.pgh.pa.us        1089                 :CBC         166 : brinbuild(Relation heap, Relation index, IndexInfo *indexInfo)
                               1090                 :                : {
                               1091                 :                :     IndexBuildResult *result;
                               1092                 :                :     double      reltuples;
                               1093                 :                :     double      idxtuples;
                               1094                 :                :     BrinRevmap *revmap;
                               1095                 :                :     BrinBuildState *state;
                               1096                 :                :     Buffer      meta;
                               1097                 :                :     BlockNumber pagesPerRange;
                               1098                 :                : 
                               1099                 :                :     /*
                               1100                 :                :      * We expect to be called exactly once for any index relation.
                               1101                 :                :      */
 3446 alvherre@alvh.no-ip.     1102         [ -  + ]:            166 :     if (RelationGetNumberOfBlocks(index) != 0)
 3446 alvherre@alvh.no-ip.     1103         [ #  # ]:UBC           0 :         elog(ERROR, "index \"%s\" already contains data",
                               1104                 :                :              RelationGetRelationName(index));
                               1105                 :                : 
                               1106                 :                :     /*
                               1107                 :                :      * Critical section not required, because on error the creation of the
                               1108                 :                :      * whole relation will be rolled back.
                               1109                 :                :      */
                               1110                 :                : 
  235 tmunro@postgresql.or     1111                 :CBC         166 :     meta = ExtendBufferedRel(BMR_REL(index), MAIN_FORKNUM, NULL,
                               1112                 :                :                              EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
 3446 alvherre@alvh.no-ip.     1113         [ -  + ]:            166 :     Assert(BufferGetBlockNumber(meta) == BRIN_METAPAGE_BLKNO);
                               1114                 :                : 
 2916 kgrittn@postgresql.o     1115   [ +  -  -  +  :            166 :     brin_metapage_init(BufferGetPage(meta), BrinGetPagesPerRange(index),
                                              +  + ]
                               1116                 :                :                        BRIN_CURRENT_VERSION);
 3446 alvherre@alvh.no-ip.     1117                 :            166 :     MarkBufferDirty(meta);
                               1118                 :                : 
                               1119   [ +  +  +  +  :            166 :     if (RelationNeedsWAL(index))
                                        +  +  -  + ]
                               1120                 :                :     {
                               1121                 :                :         xl_brin_createidx xlrec;
                               1122                 :                :         XLogRecPtr  recptr;
                               1123                 :                :         Page        page;
                               1124                 :                : 
                               1125                 :             84 :         xlrec.version = BRIN_CURRENT_VERSION;
                               1126   [ +  -  -  +  :             84 :         xlrec.pagesPerRange = BrinGetPagesPerRange(index);
                                              +  + ]
                               1127                 :                : 
 3433 heikki.linnakangas@i     1128                 :             84 :         XLogBeginInsert();
                               1129                 :             84 :         XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
 2355 tgl@sss.pgh.pa.us        1130                 :             84 :         XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT | REGBUF_STANDARD);
                               1131                 :                : 
 3433 heikki.linnakangas@i     1132                 :             84 :         recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
                               1133                 :                : 
 2916 kgrittn@postgresql.o     1134                 :             84 :         page = BufferGetPage(meta);
 3446 alvherre@alvh.no-ip.     1135                 :             84 :         PageSetLSN(page, recptr);
                               1136                 :                :     }
                               1137                 :                : 
                               1138                 :            166 :     UnlockReleaseBuffer(meta);
                               1139                 :                : 
                               1140                 :                :     /*
                               1141                 :                :      * Initialize our state, including the deformed tuple state.
                               1142                 :                :      */
  219 tmunro@postgresql.or     1143                 :GNC         166 :     revmap = brinRevmapInitialize(index, &pagesPerRange);
  128 tomas.vondra@postgre     1144                 :            166 :     state = initialize_brin_buildstate(index, revmap, pagesPerRange,
                               1145                 :                :                                        RelationGetNumberOfBlocks(heap));
                               1146                 :                : 
                               1147                 :                :     /*
                               1148                 :                :      * Attempt to launch parallel worker scan when required
                               1149                 :                :      *
                               1150                 :                :      * XXX plan_create_index_workers makes the number of workers dependent on
                               1151                 :                :      * maintenance_work_mem, requiring 32MB for each worker. That makes sense
                               1152                 :                :      * for btree, but not for BRIN, which can do away with much less memory.
                               1153                 :                :      * So maybe make that somehow less strict, optionally?
                               1154                 :                :      */
                               1155         [ +  + ]:            166 :     if (indexInfo->ii_ParallelWorkers > 0)
                               1156                 :              2 :         _brin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
                               1157                 :                :                              indexInfo->ii_ParallelWorkers);
                               1158                 :                : 
                               1159                 :                :     /*
                               1160                 :                :      * If parallel build requested and at least one worker process was
                               1161                 :                :      * successfully launched, set up coordination state, wait for workers to
                               1162                 :                :      * complete. Then read all tuples from the shared tuplesort and insert
                               1163                 :                :      * them into the index.
                               1164                 :                :      *
                               1165                 :                :      * In serial mode, simply scan the table and build the index one index
                               1166                 :                :      * tuple at a time.
                               1167                 :                :      */
                               1168         [ +  + ]:            166 :     if (state->bs_leader)
                               1169                 :                :     {
                               1170                 :                :         SortCoordinate coordinate;
                               1171                 :                : 
                               1172                 :              1 :         coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
                               1173                 :              1 :         coordinate->isWorker = false;
                               1174                 :              1 :         coordinate->nParticipants =
                               1175                 :              1 :             state->bs_leader->nparticipanttuplesorts;
                               1176                 :              1 :         coordinate->sharedsort = state->bs_leader->sharedsort;
                               1177                 :                : 
                               1178                 :                :         /*
                               1179                 :                :          * Begin leader tuplesort.
                               1180                 :                :          *
                               1181                 :                :          * In cases where parallelism is involved, the leader receives the
                               1182                 :                :          * same share of maintenance_work_mem as a serial sort (it is
                               1183                 :                :          * generally treated in the same way as a serial sort once we return).
                               1184                 :                :          * Parallel worker Tuplesortstates will have received only a fraction
                               1185                 :                :          * of maintenance_work_mem, though.
                               1186                 :                :          *
                               1187                 :                :          * We rely on the lifetime of the Leader Tuplesortstate almost not
                               1188                 :                :          * overlapping with any worker Tuplesortstate's lifetime.  There may
                               1189                 :                :          * be some small overlap, but that's okay because we rely on leader
                               1190                 :                :          * Tuplesortstate only allocating a small, fixed amount of memory
                               1191                 :                :          * here. When its tuplesort_performsort() is called (by our caller),
                               1192                 :                :          * and significant amounts of memory are likely to be used, all
                               1193                 :                :          * workers must have already freed almost all memory held by their
                               1194                 :                :          * Tuplesortstates (they are about to go away completely, too).  The
                               1195                 :                :          * overall effect is that maintenance_work_mem always represents an
                               1196                 :                :          * absolute high watermark on the amount of memory used by a CREATE
                               1197                 :                :          * INDEX operation, regardless of the use of parallelism or any other
                               1198                 :                :          * factor.
                               1199                 :                :          */
  106                          1200                 :              1 :         state->bs_sortstate =
                               1201                 :              1 :             tuplesort_begin_index_brin(maintenance_work_mem, coordinate,
                               1202                 :                :                                        TUPLESORT_NONE);
                               1203                 :                : 
  128                          1204                 :              1 :         _brin_end_parallel(state->bs_leader, state);
                               1205                 :                :     }
                               1206                 :                :     else                        /* no parallel index build */
                               1207                 :                :     {
                               1208                 :                :         /*
                               1209                 :                :          * Now scan the relation.  No syncscan allowed here because we want
                               1210                 :                :          * the heap blocks in physical order (we want to produce the ranges
                               1211                 :                :          * starting from block 0, and the callback also relies on this to not
                               1212                 :                :          * generate summary for the same range twice).
                               1213                 :                :          */
                               1214                 :            165 :         reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
                               1215                 :                :                                            brinbuildCallback, (void *) state, NULL);
                               1216                 :                : 
                               1217                 :                :         /*
                               1218                 :                :          * process the final batch
                               1219                 :                :          *
                               1220                 :                :          * XXX Note this does not update state->bs_currRangeStart, i.e. it
                               1221                 :                :          * stays set to the last range added to the index. This is OK, because
                               1222                 :                :          * that's what brin_fill_empty_ranges expects.
                               1223                 :                :          */
                               1224                 :            165 :         form_and_insert_tuple(state);
                               1225                 :                : 
                               1226                 :                :         /*
                               1227                 :                :          * Backfill the final ranges with empty data.
                               1228                 :                :          *
                               1229                 :                :          * This saves us from doing what amounts to full table scans when the
                               1230                 :                :          * index with a predicate like WHERE (nonnull_column IS NULL), or
                               1231                 :                :          * other very selective predicates.
                               1232                 :                :          */
                               1233                 :            165 :         brin_fill_empty_ranges(state,
                               1234                 :                :                                state->bs_currRangeStart,
                               1235                 :                :                                state->bs_maxRangeStart);
                               1236                 :                : 
                               1237                 :                :         /* track the number of relation tuples */
                               1238                 :            165 :         state->bs_reltuples = reltuples;
                               1239                 :                :     }
                               1240                 :                : 
                               1241                 :                :     /* release resources */
 3446 alvherre@alvh.no-ip.     1242                 :CBC         166 :     idxtuples = state->bs_numtuples;
  128 tomas.vondra@postgre     1243                 :GNC         166 :     reltuples = state->bs_reltuples;
 3446 alvherre@alvh.no-ip.     1244                 :CBC         166 :     brinRevmapTerminate(state->bs_rmAccess);
                               1245                 :            166 :     terminate_brin_buildstate(state);
                               1246                 :                : 
                               1247                 :                :     /*
                               1248                 :                :      * Return statistics
                               1249                 :                :      */
  580 peter@eisentraut.org     1250                 :            166 :     result = palloc_object(IndexBuildResult);
                               1251                 :                : 
 3446 alvherre@alvh.no-ip.     1252                 :            166 :     result->heap_tuples = reltuples;
                               1253                 :            166 :     result->index_tuples = idxtuples;
                               1254                 :                : 
 3010 tgl@sss.pgh.pa.us        1255                 :            166 :     return result;
                               1256                 :                : }
                               1257                 :                : 
                               1258                 :                : void
                               1259                 :              3 : brinbuildempty(Relation index)
                               1260                 :                : {
                               1261                 :                :     Buffer      metabuf;
                               1262                 :                : 
                               1263                 :                :     /* An empty BRIN index has a metapage only. */
  235 tmunro@postgresql.or     1264                 :              3 :     metabuf = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
                               1265                 :                :                                 EB_LOCK_FIRST | EB_SKIP_EXTENSION_LOCK);
                               1266                 :                : 
                               1267                 :                :     /* Initialize and xlog metabuffer. */
 3446 alvherre@alvh.no-ip.     1268                 :              3 :     START_CRIT_SECTION();
 2916 kgrittn@postgresql.o     1269   [ +  -  -  +  :              3 :     brin_metapage_init(BufferGetPage(metabuf), BrinGetPagesPerRange(index),
                                              -  + ]
                               1270                 :                :                        BRIN_CURRENT_VERSION);
 3446 alvherre@alvh.no-ip.     1271                 :              3 :     MarkBufferDirty(metabuf);
 2355 tgl@sss.pgh.pa.us        1272                 :              3 :     log_newpage_buffer(metabuf, true);
 3446 alvherre@alvh.no-ip.     1273         [ -  + ]:              3 :     END_CRIT_SECTION();
                               1274                 :                : 
                               1275                 :              3 :     UnlockReleaseBuffer(metabuf);
                               1276                 :              3 : }
                               1277                 :                : 
                               1278                 :                : /*
                               1279                 :                :  * brinbulkdelete
                               1280                 :                :  *      Since there are no per-heap-tuple index tuples in BRIN indexes,
                               1281                 :                :  *      there's not a lot we can do here.
                               1282                 :                :  *
                               1283                 :                :  * XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
                               1284                 :                :  * tuple is deleted), meaning the need to re-run summarization on the affected
                               1285                 :                :  * range.  Would need to add an extra flag in brintuples for that.
                               1286                 :                :  */
                               1287                 :                : IndexBulkDeleteResult *
 3010 tgl@sss.pgh.pa.us        1288                 :              8 : brinbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
                               1289                 :                :                IndexBulkDeleteCallback callback, void *callback_state)
                               1290                 :                : {
                               1291                 :                :     /* allocate stats if first time through, else re-use existing struct */
 3446 alvherre@alvh.no-ip.     1292         [ +  - ]:              8 :     if (stats == NULL)
  580 peter@eisentraut.org     1293                 :              8 :         stats = palloc0_object(IndexBulkDeleteResult);
                               1294                 :                : 
 3010 tgl@sss.pgh.pa.us        1295                 :              8 :     return stats;
                               1296                 :                : }
                               1297                 :                : 
                               1298                 :                : /*
                               1299                 :                :  * This routine is in charge of "vacuuming" a BRIN index: we just summarize
                               1300                 :                :  * ranges that are currently unsummarized.
                               1301                 :                :  */
                               1302                 :                : IndexBulkDeleteResult *
                               1303                 :             69 : brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
                               1304                 :                : {
                               1305                 :                :     Relation    heapRel;
                               1306                 :                : 
                               1307                 :                :     /* No-op in ANALYZE ONLY mode */
 3446 alvherre@alvh.no-ip.     1308         [ +  + ]:             69 :     if (info->analyze_only)
 3010 tgl@sss.pgh.pa.us        1309                 :              2 :         return stats;
                               1310                 :                : 
 3446 alvherre@alvh.no-ip.     1311         [ +  + ]:             67 :     if (!stats)
  580 peter@eisentraut.org     1312                 :             59 :         stats = palloc0_object(IndexBulkDeleteResult);
 3446 alvherre@alvh.no-ip.     1313                 :             67 :     stats->num_pages = RelationGetNumberOfBlocks(info->index);
                               1314                 :                :     /* rest of stats is initialized by zeroing */
                               1315                 :                : 
 1910 andres@anarazel.de       1316                 :             67 :     heapRel = table_open(IndexGetRelation(RelationGetRelid(info->index), false),
                               1317                 :                :                          AccessShareLock);
                               1318                 :                : 
 3168 alvherre@alvh.no-ip.     1319                 :             67 :     brin_vacuum_scan(info->index, info->strategy);
                               1320                 :                : 
 2354                          1321                 :             67 :     brinsummarize(info->index, heapRel, BRIN_ALL_BLOCKRANGES, false,
                               1322                 :                :                   &stats->num_index_tuples, &stats->num_index_tuples);
                               1323                 :                : 
 1910 andres@anarazel.de       1324                 :             67 :     table_close(heapRel, AccessShareLock);
                               1325                 :                : 
 3010 tgl@sss.pgh.pa.us        1326                 :             67 :     return stats;
                               1327                 :                : }
                               1328                 :                : 
                               1329                 :                : /*
                               1330                 :                :  * reloptions processor for BRIN indexes
                               1331                 :                :  */
                               1332                 :                : bytea *
                               1333                 :            473 : brinoptions(Datum reloptions, bool validate)
                               1334                 :                : {
                               1335                 :                :     static const relopt_parse_elt tab[] = {
                               1336                 :                :         {"pages_per_range", RELOPT_TYPE_INT, offsetof(BrinOptions, pagesPerRange)},
                               1337                 :                :         {"autosummarize", RELOPT_TYPE_BOOL, offsetof(BrinOptions, autosummarize)}
                               1338                 :                :     };
                               1339                 :                : 
 1622 michael@paquier.xyz      1340                 :            473 :     return (bytea *) build_reloptions(reloptions, validate,
                               1341                 :                :                                       RELOPT_KIND_BRIN,
                               1342                 :                :                                       sizeof(BrinOptions),
                               1343                 :                :                                       tab, lengthof(tab));
                               1344                 :                : }
                               1345                 :                : 
                               1346                 :                : /*
                               1347                 :                :  * SQL-callable function to scan through an index and summarize all ranges
                               1348                 :                :  * that are not currently summarized.
                               1349                 :                :  */
                               1350                 :                : Datum
 3446 alvherre@alvh.no-ip.     1351                 :             38 : brin_summarize_new_values(PG_FUNCTION_ARGS)
                               1352                 :                : {
 2570                          1353                 :             38 :     Datum       relation = PG_GETARG_DATUM(0);
                               1354                 :                : 
                               1355                 :             38 :     return DirectFunctionCall2(brin_summarize_range,
                               1356                 :                :                                relation,
                               1357                 :                :                                Int64GetDatum((int64) BRIN_ALL_BLOCKRANGES));
                               1358                 :                : }
                               1359                 :                : 
                               1360                 :                : /*
                               1361                 :                :  * SQL-callable function to summarize the indicated page range, if not already
                               1362                 :                :  * summarized.  If the second argument is BRIN_ALL_BLOCKRANGES, all
                               1363                 :                :  * unsummarized ranges are summarized.
                               1364                 :                :  */
                               1365                 :                : Datum
                               1366                 :            102 : brin_summarize_range(PG_FUNCTION_ARGS)
                               1367                 :                : {
 3446                          1368                 :            102 :     Oid         indexoid = PG_GETARG_OID(0);
 2570                          1369                 :            102 :     int64       heapBlk64 = PG_GETARG_INT64(1);
                               1370                 :                :     BlockNumber heapBlk;
                               1371                 :                :     Oid         heapoid;
                               1372                 :                :     Relation    indexRel;
                               1373                 :                :     Relation    heapRel;
                               1374                 :                :     Oid         save_userid;
                               1375                 :                :     int         save_sec_context;
                               1376                 :                :     int         save_nestlevel;
 3446                          1377                 :            102 :     double      numSummarized = 0;
                               1378                 :                : 
 2131                          1379         [ -  + ]:            102 :     if (RecoveryInProgress())
 2131 alvherre@alvh.no-ip.     1380         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1381                 :                :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                               1382                 :                :                  errmsg("recovery is in progress"),
                               1383                 :                :                  errhint("BRIN control functions cannot be executed during recovery.")));
                               1384                 :                : 
 2570 alvherre@alvh.no-ip.     1385   [ +  +  +  + ]:CBC         102 :     if (heapBlk64 > BRIN_ALL_BLOCKRANGES || heapBlk64 < 0)
                               1386         [ +  - ]:             18 :         ereport(ERROR,
                               1387                 :                :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                               1388                 :                :                  errmsg("block number out of range: %lld",
                               1389                 :                :                         (long long) heapBlk64)));
                               1390                 :             84 :     heapBlk = (BlockNumber) heapBlk64;
                               1391                 :                : 
                               1392                 :                :     /*
                               1393                 :                :      * We must lock table before index to avoid deadlocks.  However, if the
                               1394                 :                :      * passed indexoid isn't an index then IndexGetRelation() will fail.
                               1395                 :                :      * Rather than emitting a not-very-helpful error message, postpone
                               1396                 :                :      * complaining, expecting that the is-it-an-index test below will fail.
                               1397                 :                :      */
 3032 tgl@sss.pgh.pa.us        1398                 :             84 :     heapoid = IndexGetRelation(indexoid, true);
                               1399         [ +  + ]:             84 :     if (OidIsValid(heapoid))
                               1400                 :                :     {
 1910 andres@anarazel.de       1401                 :             75 :         heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
                               1402                 :                : 
                               1403                 :                :         /*
                               1404                 :                :          * Autovacuum calls us.  For its benefit, switch to the table owner's
                               1405                 :                :          * userid, so that any index functions are run as that user.  Also
                               1406                 :                :          * lock down security-restricted operations and arrange to make GUC
                               1407                 :                :          * variable changes local to this command.  This is harmless, albeit
                               1408                 :                :          * unnecessary, when called from SQL, because we fail shortly if the
                               1409                 :                :          * user does not own the index.
                               1410                 :                :          */
  706 noah@leadboat.com        1411                 :             75 :         GetUserIdAndSecContext(&save_userid, &save_sec_context);
                               1412                 :             75 :         SetUserIdAndSecContext(heapRel->rd_rel->relowner,
                               1413                 :                :                                save_sec_context | SECURITY_RESTRICTED_OPERATION);
                               1414                 :             75 :         save_nestlevel = NewGUCNestLevel();
   41 jdavis@postgresql.or     1415                 :GNC          75 :         RestrictSearchPath();
                               1416                 :                :     }
                               1417                 :                :     else
                               1418                 :                :     {
 3032 tgl@sss.pgh.pa.us        1419                 :CBC           9 :         heapRel = NULL;
                               1420                 :                :         /* Set these just to suppress "uninitialized variable" warnings */
  683                          1421                 :              9 :         save_userid = InvalidOid;
                               1422                 :              9 :         save_sec_context = -1;
                               1423                 :              9 :         save_nestlevel = -1;
                               1424                 :                :     }
                               1425                 :                : 
 3446 alvherre@alvh.no-ip.     1426                 :             84 :     indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
                               1427                 :                : 
                               1428                 :                :     /* Must be a BRIN index */
 3032 tgl@sss.pgh.pa.us        1429         [ +  - ]:             75 :     if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
                               1430         [ +  + ]:             75 :         indexRel->rd_rel->relam != BRIN_AM_OID)
                               1431         [ +  - ]:              9 :         ereport(ERROR,
                               1432                 :                :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                               1433                 :                :                  errmsg("\"%s\" is not a BRIN index",
                               1434                 :                :                         RelationGetRelationName(indexRel))));
                               1435                 :                : 
                               1436                 :                :     /* User must own the index (comparable to privileges needed for VACUUM) */
  518 peter@eisentraut.org     1437   [ +  -  -  + ]:             66 :     if (heapRel != NULL && !object_ownercheck(RelationRelationId, indexoid, save_userid))
 2325 peter_e@gmx.net          1438                 :UBC           0 :         aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
 3032 tgl@sss.pgh.pa.us        1439                 :              0 :                        RelationGetRelationName(indexRel));
                               1440                 :                : 
                               1441                 :                :     /*
                               1442                 :                :      * Since we did the IndexGetRelation call above without any lock, it's
                               1443                 :                :      * barely possible that a race against an index drop/recreation could have
                               1444                 :                :      * netted us the wrong table.  Recheck.
                               1445                 :                :      */
 3032 tgl@sss.pgh.pa.us        1446   [ +  -  -  + ]:CBC          66 :     if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
 3032 tgl@sss.pgh.pa.us        1447         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1448                 :                :                 (errcode(ERRCODE_UNDEFINED_TABLE),
                               1449                 :                :                  errmsg("could not open parent table of index \"%s\"",
                               1450                 :                :                         RelationGetRelationName(indexRel))));
                               1451                 :                : 
                               1452                 :                :     /* see gin_clean_pending_list() */
  167 noah@leadboat.com        1453         [ +  - ]:CBC          66 :     if (indexRel->rd_index->indisvalid)
                               1454                 :             66 :         brinsummarize(indexRel, heapRel, heapBlk, true, &numSummarized, NULL);
                               1455                 :                :     else
  167 noah@leadboat.com        1456         [ #  # ]:UBC           0 :         ereport(DEBUG1,
                               1457                 :                :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                               1458                 :                :                  errmsg("index \"%s\" is not valid",
                               1459                 :                :                         RelationGetRelationName(indexRel))));
                               1460                 :                : 
                               1461                 :                :     /* Roll back any GUC changes executed by index functions */
  706 noah@leadboat.com        1462                 :CBC          66 :     AtEOXact_GUC(false, save_nestlevel);
                               1463                 :                : 
                               1464                 :                :     /* Restore userid and security context */
                               1465                 :             66 :     SetUserIdAndSecContext(save_userid, save_sec_context);
                               1466                 :                : 
 3446 alvherre@alvh.no-ip.     1467                 :             66 :     relation_close(indexRel, ShareUpdateExclusiveLock);
                               1468                 :             66 :     relation_close(heapRel, ShareUpdateExclusiveLock);
                               1469                 :                : 
                               1470                 :             66 :     PG_RETURN_INT32((int32) numSummarized);
                               1471                 :                : }
                               1472                 :                : 
                               1473                 :                : /*
                               1474                 :                :  * SQL-callable interface to mark a range as no longer summarized
                               1475                 :                :  */
                               1476                 :                : Datum
 2570                          1477                 :             52 : brin_desummarize_range(PG_FUNCTION_ARGS)
                               1478                 :                : {
 2524 bruce@momjian.us         1479                 :             52 :     Oid         indexoid = PG_GETARG_OID(0);
                               1480                 :             52 :     int64       heapBlk64 = PG_GETARG_INT64(1);
                               1481                 :                :     BlockNumber heapBlk;
                               1482                 :                :     Oid         heapoid;
                               1483                 :                :     Relation    heapRel;
                               1484                 :                :     Relation    indexRel;
                               1485                 :                :     bool        done;
                               1486                 :                : 
 2131 alvherre@alvh.no-ip.     1487         [ -  + ]:             52 :     if (RecoveryInProgress())
 2131 alvherre@alvh.no-ip.     1488         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1489                 :                :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                               1490                 :                :                  errmsg("recovery is in progress"),
                               1491                 :                :                  errhint("BRIN control functions cannot be executed during recovery.")));
                               1492                 :                : 
 2570 alvherre@alvh.no-ip.     1493   [ +  -  +  + ]:CBC          52 :     if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
                               1494         [ +  - ]:              9 :         ereport(ERROR,
                               1495                 :                :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                               1496                 :                :                  errmsg("block number out of range: %lld",
                               1497                 :                :                         (long long) heapBlk64)));
                               1498                 :             43 :     heapBlk = (BlockNumber) heapBlk64;
                               1499                 :                : 
                               1500                 :                :     /*
                               1501                 :                :      * We must lock table before index to avoid deadlocks.  However, if the
                               1502                 :                :      * passed indexoid isn't an index then IndexGetRelation() will fail.
                               1503                 :                :      * Rather than emitting a not-very-helpful error message, postpone
                               1504                 :                :      * complaining, expecting that the is-it-an-index test below will fail.
                               1505                 :                :      *
                               1506                 :                :      * Unlike brin_summarize_range(), autovacuum never calls this.  Hence, we
                               1507                 :                :      * don't switch userid.
                               1508                 :                :      */
                               1509                 :             43 :     heapoid = IndexGetRelation(indexoid, true);
                               1510         [ +  - ]:             43 :     if (OidIsValid(heapoid))
 1910 andres@anarazel.de       1511                 :             43 :         heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
                               1512                 :                :     else
 2570 alvherre@alvh.no-ip.     1513                 :UBC           0 :         heapRel = NULL;
                               1514                 :                : 
 2570 alvherre@alvh.no-ip.     1515                 :CBC          43 :     indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
                               1516                 :                : 
                               1517                 :                :     /* Must be a BRIN index */
                               1518         [ +  - ]:             43 :     if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
                               1519         [ -  + ]:             43 :         indexRel->rd_rel->relam != BRIN_AM_OID)
 2570 alvherre@alvh.no-ip.     1520         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1521                 :                :                 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
                               1522                 :                :                  errmsg("\"%s\" is not a BRIN index",
                               1523                 :                :                         RelationGetRelationName(indexRel))));
                               1524                 :                : 
                               1525                 :                :     /* User must own the index (comparable to privileges needed for VACUUM) */
  518 peter@eisentraut.org     1526         [ -  + ]:CBC          43 :     if (!object_ownercheck(RelationRelationId, indexoid, GetUserId()))
 2325 peter_e@gmx.net          1527                 :UBC           0 :         aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX,
 2570 alvherre@alvh.no-ip.     1528                 :              0 :                        RelationGetRelationName(indexRel));
                               1529                 :                : 
                               1530                 :                :     /*
                               1531                 :                :      * Since we did the IndexGetRelation call above without any lock, it's
                               1532                 :                :      * barely possible that a race against an index drop/recreation could have
                               1533                 :                :      * netted us the wrong table.  Recheck.
                               1534                 :                :      */
 2570 alvherre@alvh.no-ip.     1535   [ +  -  -  + ]:CBC          43 :     if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
 2570 alvherre@alvh.no-ip.     1536         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1537                 :                :                 (errcode(ERRCODE_UNDEFINED_TABLE),
                               1538                 :                :                  errmsg("could not open parent table of index \"%s\"",
                               1539                 :                :                         RelationGetRelationName(indexRel))));
                               1540                 :                : 
                               1541                 :                :     /* see gin_clean_pending_list() */
  167 noah@leadboat.com        1542         [ +  - ]:CBC          43 :     if (indexRel->rd_index->indisvalid)
                               1543                 :                :     {
                               1544                 :                :         /* the revmap does the hard work */
                               1545                 :                :         do
                               1546                 :                :         {
                               1547                 :             43 :             done = brinRevmapDesummarizeRange(indexRel, heapBlk);
                               1548                 :                :         }
                               1549         [ -  + ]:             43 :         while (!done);
                               1550                 :                :     }
                               1551                 :                :     else
  167 noah@leadboat.com        1552         [ #  # ]:UBC           0 :         ereport(DEBUG1,
                               1553                 :                :                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
                               1554                 :                :                  errmsg("index \"%s\" is not valid",
                               1555                 :                :                         RelationGetRelationName(indexRel))));
                               1556                 :                : 
 2570 alvherre@alvh.no-ip.     1557                 :CBC          43 :     relation_close(indexRel, ShareUpdateExclusiveLock);
                               1558                 :             43 :     relation_close(heapRel, ShareUpdateExclusiveLock);
                               1559                 :                : 
                               1560                 :             43 :     PG_RETURN_VOID();
                               1561                 :                : }
                               1562                 :                : 
                               1563                 :                : /*
                               1564                 :                :  * Build a BrinDesc used to create or scan a BRIN index
                               1565                 :                :  */
                               1566                 :                : BrinDesc *
 3446                          1567                 :           2254 : brin_build_desc(Relation rel)
                               1568                 :                : {
                               1569                 :                :     BrinOpcInfo **opcinfo;
                               1570                 :                :     BrinDesc   *bdesc;
                               1571                 :                :     TupleDesc   tupdesc;
                               1572                 :           2254 :     int         totalstored = 0;
                               1573                 :                :     int         keyno;
                               1574                 :                :     long        totalsize;
                               1575                 :                :     MemoryContext cxt;
                               1576                 :                :     MemoryContext oldcxt;
                               1577                 :                : 
                               1578                 :           2254 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
                               1579                 :                :                                 "brin desc cxt",
                               1580                 :                :                                 ALLOCSET_SMALL_SIZES);
                               1581                 :           2254 :     oldcxt = MemoryContextSwitchTo(cxt);
                               1582                 :           2254 :     tupdesc = RelationGetDescr(rel);
                               1583                 :                : 
                               1584                 :                :     /*
                               1585                 :                :      * Obtain BrinOpcInfo for each indexed column.  While at it, accumulate
                               1586                 :                :      * the number of columns stored, since the number is opclass-defined.
                               1587                 :                :      */
  331 tgl@sss.pgh.pa.us        1588                 :           2254 :     opcinfo = palloc_array(BrinOpcInfo *, tupdesc->natts);
 3446 alvherre@alvh.no-ip.     1589         [ +  + ]:          38049 :     for (keyno = 0; keyno < tupdesc->natts; keyno++)
                               1590                 :                :     {
                               1591                 :                :         FmgrInfo   *opcInfoFn;
 2429 andres@anarazel.de       1592                 :          35795 :         Form_pg_attribute attr = TupleDescAttr(tupdesc, keyno);
                               1593                 :                : 
 3446 alvherre@alvh.no-ip.     1594                 :          35795 :         opcInfoFn = index_getprocinfo(rel, keyno + 1, BRIN_PROCNUM_OPCINFO);
                               1595                 :                : 
                               1596                 :          71590 :         opcinfo[keyno] = (BrinOpcInfo *)
 2429 andres@anarazel.de       1597                 :          35795 :             DatumGetPointer(FunctionCall1(opcInfoFn, attr->atttypid));
 3446 alvherre@alvh.no-ip.     1598                 :          35795 :         totalstored += opcinfo[keyno]->oi_nstored;
                               1599                 :                :     }
                               1600                 :                : 
                               1601                 :                :     /* Allocate our result struct and fill it in */
                               1602                 :           2254 :     totalsize = offsetof(BrinDesc, bd_info) +
                               1603                 :           2254 :         sizeof(BrinOpcInfo *) * tupdesc->natts;
                               1604                 :                : 
                               1605                 :           2254 :     bdesc = palloc(totalsize);
                               1606                 :           2254 :     bdesc->bd_context = cxt;
                               1607                 :           2254 :     bdesc->bd_index = rel;
                               1608                 :           2254 :     bdesc->bd_tupdesc = tupdesc;
                               1609                 :           2254 :     bdesc->bd_disktdesc = NULL; /* generated lazily */
                               1610                 :           2254 :     bdesc->bd_totalstored = totalstored;
                               1611                 :                : 
                               1612         [ +  + ]:          38049 :     for (keyno = 0; keyno < tupdesc->natts; keyno++)
                               1613                 :          35795 :         bdesc->bd_info[keyno] = opcinfo[keyno];
                               1614                 :           2254 :     pfree(opcinfo);
                               1615                 :                : 
                               1616                 :           2254 :     MemoryContextSwitchTo(oldcxt);
                               1617                 :                : 
                               1618                 :           2254 :     return bdesc;
                               1619                 :                : }
                               1620                 :                : 
                               1621                 :                : void
                               1622                 :           1708 : brin_free_desc(BrinDesc *bdesc)
                               1623                 :                : {
                               1624                 :                :     /* make sure the tupdesc is still valid */
                               1625         [ -  + ]:           1708 :     Assert(bdesc->bd_tupdesc->tdrefcount >= 1);
                               1626                 :                :     /* no need for retail pfree */
                               1627                 :           1708 :     MemoryContextDelete(bdesc->bd_context);
                               1628                 :           1708 : }
                               1629                 :                : 
                               1630                 :                : /*
                               1631                 :                :  * Fetch index's statistical data into *stats
                               1632                 :                :  */
                               1633                 :                : void
 2565                          1634                 :           5365 : brinGetStats(Relation index, BrinStatsData *stats)
                               1635                 :                : {
                               1636                 :                :     Buffer      metabuffer;
                               1637                 :                :     Page        metapage;
                               1638                 :                :     BrinMetaPageData *metadata;
                               1639                 :                : 
                               1640                 :           5365 :     metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
                               1641                 :           5365 :     LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
                               1642                 :           5365 :     metapage = BufferGetPage(metabuffer);
                               1643                 :           5365 :     metadata = (BrinMetaPageData *) PageGetContents(metapage);
                               1644                 :                : 
                               1645                 :           5365 :     stats->pagesPerRange = metadata->pagesPerRange;
                               1646                 :           5365 :     stats->revmapNumPages = metadata->lastRevmapPage - 1;
                               1647                 :                : 
                               1648                 :           5365 :     UnlockReleaseBuffer(metabuffer);
                               1649                 :           5365 : }
                               1650                 :                : 
                               1651                 :                : /*
                               1652                 :                :  * Initialize a BrinBuildState appropriate to create tuples on the given index.
                               1653                 :                :  */
                               1654                 :                : static BrinBuildState *
 3446                          1655                 :            214 : initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap,
                               1656                 :                :                            BlockNumber pagesPerRange, BlockNumber tablePages)
                               1657                 :                : {
                               1658                 :                :     BrinBuildState *state;
  128 tomas.vondra@postgre     1659                 :GNC         214 :     BlockNumber lastRange = 0;
                               1660                 :                : 
  580 peter@eisentraut.org     1661                 :CBC         214 :     state = palloc_object(BrinBuildState);
                               1662                 :                : 
 3446 alvherre@alvh.no-ip.     1663                 :            214 :     state->bs_irel = idxRel;
                               1664                 :            214 :     state->bs_numtuples = 0;
  128 tomas.vondra@postgre     1665                 :GNC         214 :     state->bs_reltuples = 0;
 3446 alvherre@alvh.no-ip.     1666                 :CBC         214 :     state->bs_currentInsertBuf = InvalidBuffer;
                               1667                 :            214 :     state->bs_pagesPerRange = pagesPerRange;
                               1668                 :            214 :     state->bs_currRangeStart = 0;
                               1669                 :            214 :     state->bs_rmAccess = revmap;
                               1670                 :            214 :     state->bs_bdesc = brin_build_desc(idxRel);
                               1671                 :            214 :     state->bs_dtuple = brin_new_memtuple(state->bs_bdesc);
  128 tomas.vondra@postgre     1672                 :GNC         214 :     state->bs_leader = NULL;
                               1673                 :            214 :     state->bs_worker_id = 0;
  106                          1674                 :            214 :     state->bs_sortstate = NULL;
  128                          1675                 :            214 :     state->bs_context = CurrentMemoryContext;
                               1676                 :            214 :     state->bs_emptyTuple = NULL;
                               1677                 :            214 :     state->bs_emptyTupleLen = 0;
                               1678                 :                : 
                               1679                 :                :     /* Remember the memory context to use for an empty tuple, if needed. */
                               1680                 :            214 :     state->bs_context = CurrentMemoryContext;
                               1681                 :            214 :     state->bs_emptyTuple = NULL;
                               1682                 :            214 :     state->bs_emptyTupleLen = 0;
                               1683                 :                : 
                               1684                 :                :     /*
                               1685                 :                :      * Calculate the start of the last page range. Page numbers are 0-based,
                               1686                 :                :      * so to calculate the index we need to subtract one. The integer division
                               1687                 :                :      * gives us the index of the page range.
                               1688                 :                :      */
                               1689         [ +  + ]:            214 :     if (tablePages > 0)
                               1690                 :            166 :         lastRange = ((tablePages - 1) / pagesPerRange) * pagesPerRange;
                               1691                 :                : 
                               1692                 :                :     /* Now calculate the start of the next range. */
                               1693                 :            214 :     state->bs_maxRangeStart = lastRange + state->bs_pagesPerRange;
                               1694                 :                : 
 3446 alvherre@alvh.no-ip.     1695                 :CBC         214 :     return state;
                               1696                 :                : }
                               1697                 :                : 
                               1698                 :                : /*
                               1699                 :                :  * Release resources associated with a BrinBuildState.
                               1700                 :                :  */
                               1701                 :                : static void
                               1702                 :            211 : terminate_brin_buildstate(BrinBuildState *state)
                               1703                 :                : {
                               1704                 :                :     /*
                               1705                 :                :      * Release the last index buffer used.  We might as well ensure that
                               1706                 :                :      * whatever free space remains in that page is available in FSM, too.
                               1707                 :                :      */
                               1708         [ +  + ]:            211 :     if (!BufferIsInvalid(state->bs_currentInsertBuf))
                               1709                 :                :     {
                               1710                 :                :         Page        page;
                               1711                 :                :         Size        freespace;
                               1712                 :                :         BlockNumber blk;
                               1713                 :                : 
 2916 kgrittn@postgresql.o     1714                 :            166 :         page = BufferGetPage(state->bs_currentInsertBuf);
 2202 tgl@sss.pgh.pa.us        1715                 :            166 :         freespace = PageGetFreeSpace(page);
                               1716                 :            166 :         blk = BufferGetBlockNumber(state->bs_currentInsertBuf);
 3446 alvherre@alvh.no-ip.     1717                 :            166 :         ReleaseBuffer(state->bs_currentInsertBuf);
 1804 akapila@postgresql.o     1718                 :            166 :         RecordPageWithFreeSpace(state->bs_irel, blk, freespace);
 2202 tgl@sss.pgh.pa.us        1719                 :            166 :         FreeSpaceMapVacuumRange(state->bs_irel, blk, blk + 1);
                               1720                 :                :     }
                               1721                 :                : 
 3446 alvherre@alvh.no-ip.     1722                 :            211 :     brin_free_desc(state->bs_bdesc);
                               1723                 :            211 :     pfree(state->bs_dtuple);
                               1724                 :            211 :     pfree(state);
                               1725                 :            211 : }
                               1726                 :                : 
                               1727                 :                : /*
                               1728                 :                :  * On the given BRIN index, summarize the heap page range that corresponds
                               1729                 :                :  * to the heap block number given.
                               1730                 :                :  *
                               1731                 :                :  * This routine can run in parallel with insertions into the heap.  To avoid
                               1732                 :                :  * missing those values from the summary tuple, we first insert a placeholder
                               1733                 :                :  * index tuple into the index, then execute the heap scan; transactions
                               1734                 :                :  * concurrent with the scan update the placeholder tuple.  After the scan, we
                               1735                 :                :  * union the placeholder tuple with the one computed by this routine.  The
                               1736                 :                :  * update of the index value happens in a loop, so that if somebody updates
                               1737                 :                :  * the placeholder tuple after we read it, we detect the case and try again.
                               1738                 :                :  * This ensures that the concurrently inserted tuples are not lost.
                               1739                 :                :  *
                               1740                 :                :  * A further corner case is this routine being asked to summarize the partial
                               1741                 :                :  * range at the end of the table.  heapNumBlocks is the (possibly outdated)
                               1742                 :                :  * table size; if we notice that the requested range lies beyond that size,
                               1743                 :                :  * we re-compute the table size after inserting the placeholder tuple, to
                               1744                 :                :  * avoid missing pages that were appended recently.
                               1745                 :                :  */
                               1746                 :                : static void
                               1747                 :           1473 : summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
                               1748                 :                :                 BlockNumber heapBlk, BlockNumber heapNumBlks)
                               1749                 :                : {
                               1750                 :                :     Buffer      phbuf;
                               1751                 :                :     BrinTuple  *phtup;
                               1752                 :                :     Size        phsz;
                               1753                 :                :     OffsetNumber offset;
                               1754                 :                :     BlockNumber scanNumBlks;
                               1755                 :                : 
                               1756                 :                :     /*
                               1757                 :                :      * Insert the placeholder tuple
                               1758                 :                :      */
                               1759                 :           1473 :     phbuf = InvalidBuffer;
                               1760                 :           1473 :     phtup = brin_form_placeholder_tuple(state->bs_bdesc, heapBlk, &phsz);
                               1761                 :           1473 :     offset = brin_doinsert(state->bs_irel, state->bs_pagesPerRange,
                               1762                 :                :                            state->bs_rmAccess, &phbuf,
                               1763                 :                :                            heapBlk, phtup, phsz);
                               1764                 :                : 
                               1765                 :                :     /*
                               1766                 :                :      * Compute range end.  We hold ShareUpdateExclusive lock on table, so it
                               1767                 :                :      * cannot shrink concurrently (but it can grow).
                               1768                 :                :      */
 2354                          1769         [ -  + ]:           1473 :     Assert(heapBlk % state->bs_pagesPerRange == 0);
                               1770         [ +  + ]:           1473 :     if (heapBlk + state->bs_pagesPerRange > heapNumBlks)
                               1771                 :                :     {
                               1772                 :                :         /*
                               1773                 :                :          * If we're asked to scan what we believe to be the final range on the
                               1774                 :                :          * table (i.e. a range that might be partial) we need to recompute our
                               1775                 :                :          * idea of what the latest page is after inserting the placeholder
                               1776                 :                :          * tuple.  Anyone that grows the table later will update the
                               1777                 :                :          * placeholder tuple, so it doesn't matter that we won't scan these
                               1778                 :                :          * pages ourselves.  Careful: the table might have been extended
                               1779                 :                :          * beyond the current range, so clamp our result.
                               1780                 :                :          *
                               1781                 :                :          * Fortunately, this should occur infrequently.
                               1782                 :                :          */
                               1783         [ +  - ]:             12 :         scanNumBlks = Min(RelationGetNumberOfBlocks(heapRel) - heapBlk,
                               1784                 :                :                           state->bs_pagesPerRange);
                               1785                 :                :     }
                               1786                 :                :     else
                               1787                 :                :     {
                               1788                 :                :         /* Easy case: range is known to be complete */
                               1789                 :           1461 :         scanNumBlks = state->bs_pagesPerRange;
                               1790                 :                :     }
                               1791                 :                : 
                               1792                 :                :     /*
                               1793                 :                :      * Execute the partial heap scan covering the heap blocks in the specified
                               1794                 :                :      * page range, summarizing the heap tuples in it.  This scan stops just
                               1795                 :                :      * short of brinbuildCallback creating the new index entry.
                               1796                 :                :      *
                               1797                 :                :      * Note that it is critical we use the "any visible" mode of
                               1798                 :                :      * table_index_build_range_scan here: otherwise, we would miss tuples
                               1799                 :                :      * inserted by transactions that are still in progress, among other corner
                               1800                 :                :      * cases.
                               1801                 :                :      */
 3446                          1802                 :           1473 :     state->bs_currRangeStart = heapBlk;
 1839                          1803                 :           1473 :     table_index_build_range_scan(heapRel, state->bs_irel, indexInfo, false, true, false,
                               1804                 :                :                                  heapBlk, scanNumBlks,
                               1805                 :                :                                  brinbuildCallback, (void *) state, NULL);
                               1806                 :                : 
                               1807                 :                :     /*
                               1808                 :                :      * Now we update the values obtained by the scan with the placeholder
                               1809                 :                :      * tuple.  We do this in a loop which only terminates if we're able to
                               1810                 :                :      * update the placeholder tuple successfully; if we are not, this means
                               1811                 :                :      * somebody else modified the placeholder tuple after we read it.
                               1812                 :                :      */
                               1813                 :                :     for (;;)
 3446 alvherre@alvh.no-ip.     1814                 :UBC           0 :     {
                               1815                 :                :         BrinTuple  *newtup;
                               1816                 :                :         Size        newsize;
                               1817                 :                :         bool        didupdate;
                               1818                 :                :         bool        samepage;
                               1819                 :                : 
 3446 alvherre@alvh.no-ip.     1820         [ -  + ]:CBC        1473 :         CHECK_FOR_INTERRUPTS();
                               1821                 :                : 
                               1822                 :                :         /*
                               1823                 :                :          * Update the summary tuple and try to update.
                               1824                 :                :          */
                               1825                 :           1473 :         newtup = brin_form_tuple(state->bs_bdesc,
                               1826                 :                :                                  heapBlk, state->bs_dtuple, &newsize);
                               1827                 :           1473 :         samepage = brin_can_do_samepage_update(phbuf, phsz, newsize);
                               1828                 :                :         didupdate =
                               1829                 :           1473 :             brin_doupdate(state->bs_irel, state->bs_pagesPerRange,
                               1830                 :                :                           state->bs_rmAccess, heapBlk, phbuf, offset,
                               1831                 :                :                           phtup, phsz, newtup, newsize, samepage);
                               1832                 :           1473 :         brin_free_tuple(phtup);
                               1833                 :           1473 :         brin_free_tuple(newtup);
                               1834                 :                : 
                               1835                 :                :         /* If the update succeeded, we're done. */
                               1836         [ +  - ]:           1473 :         if (didupdate)
                               1837                 :           1473 :             break;
                               1838                 :                : 
                               1839                 :                :         /*
                               1840                 :                :          * If the update didn't work, it might be because somebody updated the
                               1841                 :                :          * placeholder tuple concurrently.  Extract the new version, union it
                               1842                 :                :          * with the values we have from the scan, and start over.  (There are
                               1843                 :                :          * other reasons for the update to fail, but it's simple to treat them
                               1844                 :                :          * the same.)
                               1845                 :                :          */
 3446 alvherre@alvh.no-ip.     1846                 :UBC           0 :         phtup = brinGetTupleForHeapBlock(state->bs_rmAccess, heapBlk, &phbuf,
                               1847                 :                :                                          &offset, &phsz, BUFFER_LOCK_SHARE);
                               1848                 :                :         /* the placeholder tuple must exist */
                               1849         [ #  # ]:              0 :         if (phtup == NULL)
                               1850         [ #  # ]:              0 :             elog(ERROR, "missing placeholder tuple");
 2564                          1851                 :              0 :         phtup = brin_copy_tuple(phtup, phsz, NULL, NULL);
 3446                          1852                 :              0 :         LockBuffer(phbuf, BUFFER_LOCK_UNLOCK);
                               1853                 :                : 
                               1854                 :                :         /* merge it into the tuple from the heap scan */
                               1855                 :              0 :         union_tuples(state->bs_bdesc, state->bs_dtuple, phtup);
                               1856                 :                :     }
                               1857                 :                : 
 3446 alvherre@alvh.no-ip.     1858                 :CBC        1473 :     ReleaseBuffer(phbuf);
                               1859                 :           1473 : }
                               1860                 :                : 
                               1861                 :                : /*
                               1862                 :                :  * Summarize page ranges that are not already summarized.  If pageRange is
                               1863                 :                :  * BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
                               1864                 :                :  * page range containing the given heap page number is scanned.
                               1865                 :                :  * If include_partial is true, then the partial range at the end of the table
                               1866                 :                :  * is summarized, otherwise not.
                               1867                 :                :  *
                               1868                 :                :  * For each new index tuple inserted, *numSummarized (if not NULL) is
                               1869                 :                :  * incremented; for each existing tuple, *numExisting (if not NULL) is
                               1870                 :                :  * incremented.
                               1871                 :                :  */
                               1872                 :                : static void
 2570                          1873                 :            133 : brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
                               1874                 :                :               bool include_partial, double *numSummarized, double *numExisting)
                               1875                 :                : {
                               1876                 :                :     BrinRevmap *revmap;
 3446                          1877                 :            133 :     BrinBuildState *state = NULL;
                               1878                 :            133 :     IndexInfo  *indexInfo = NULL;
                               1879                 :                :     BlockNumber heapNumBlocks;
                               1880                 :                :     BlockNumber pagesPerRange;
                               1881                 :                :     Buffer      buf;
                               1882                 :                :     BlockNumber startBlk;
                               1883                 :                : 
  219 tmunro@postgresql.or     1884                 :GNC         133 :     revmap = brinRevmapInitialize(index, &pagesPerRange);
                               1885                 :                : 
                               1886                 :                :     /* determine range of pages to process */
 2354 alvherre@alvh.no-ip.     1887                 :CBC         133 :     heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
 2570                          1888         [ +  + ]:            133 :     if (pageRange == BRIN_ALL_BLOCKRANGES)
                               1889                 :             96 :         startBlk = 0;
                               1890                 :                :     else
                               1891                 :                :     {
                               1892                 :             37 :         startBlk = (pageRange / pagesPerRange) * pagesPerRange;
 2354                          1893                 :             37 :         heapNumBlocks = Min(heapNumBlocks, startBlk + pagesPerRange);
                               1894                 :                :     }
                               1895         [ -  + ]:            133 :     if (startBlk > heapNumBlocks)
                               1896                 :                :     {
                               1897                 :                :         /* Nothing to do if start point is beyond end of table */
 2354 alvherre@alvh.no-ip.     1898                 :UBC           0 :         brinRevmapTerminate(revmap);
                               1899                 :              0 :         return;
                               1900                 :                :     }
                               1901                 :                : 
                               1902                 :                :     /*
                               1903                 :                :      * Scan the revmap to find unsummarized items.
                               1904                 :                :      */
 3446 alvherre@alvh.no-ip.     1905                 :CBC         133 :     buf = InvalidBuffer;
 2354                          1906         [ +  + ]:          10171 :     for (; startBlk < heapNumBlocks; startBlk += pagesPerRange)
                               1907                 :                :     {
                               1908                 :                :         BrinTuple  *tup;
                               1909                 :                :         OffsetNumber off;
                               1910                 :                : 
                               1911                 :                :         /*
                               1912                 :                :          * Unless requested to summarize even a partial range, go away now if
                               1913                 :                :          * we think the next range is partial.  Caller would pass true when it
                               1914                 :                :          * is typically run once bulk data loading is done
                               1915                 :                :          * (brin_summarize_new_values), and false when it is typically the
                               1916                 :                :          * result of arbitrarily-scheduled maintenance command (vacuuming).
                               1917                 :                :          */
                               1918         [ +  + ]:          10089 :         if (!include_partial &&
                               1919         [ +  + ]:           1718 :             (startBlk + pagesPerRange > heapNumBlocks))
                               1920                 :             51 :             break;
                               1921                 :                : 
 3446                          1922         [ -  + ]:          10038 :         CHECK_FOR_INTERRUPTS();
                               1923                 :                : 
 2354                          1924                 :          10038 :         tup = brinGetTupleForHeapBlock(revmap, startBlk, &buf, &off, NULL,
                               1925                 :                :                                        BUFFER_LOCK_SHARE);
 3446                          1926         [ +  + ]:          10038 :         if (tup == NULL)
                               1927                 :                :         {
                               1928                 :                :             /* no revmap entry for this heap range. Summarize it. */
                               1929         [ +  + ]:           1473 :             if (state == NULL)
                               1930                 :                :             {
                               1931                 :                :                 /* first time through */
                               1932         [ -  + ]:             45 :                 Assert(!indexInfo);
                               1933                 :             45 :                 state = initialize_brin_buildstate(index, revmap,
                               1934                 :                :                                                    pagesPerRange,
                               1935                 :                :                                                    InvalidBlockNumber);
                               1936                 :             45 :                 indexInfo = BuildIndexInfo(index);
                               1937                 :                :             }
 2354                          1938                 :           1473 :             summarize_range(indexInfo, state, heapRel, startBlk, heapNumBlocks);
                               1939                 :                : 
                               1940                 :                :             /* and re-initialize state for the next range */
 3446                          1941                 :           1473 :             brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc);
                               1942                 :                : 
                               1943         [ +  - ]:           1473 :             if (numSummarized)
                               1944                 :           1473 :                 *numSummarized += 1.0;
                               1945                 :                :         }
                               1946                 :                :         else
                               1947                 :                :         {
                               1948         [ +  + ]:           8565 :             if (numExisting)
                               1949                 :           1615 :                 *numExisting += 1.0;
                               1950                 :           8565 :             LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                               1951                 :                :         }
                               1952                 :                :     }
                               1953                 :                : 
                               1954         [ +  + ]:            133 :     if (BufferIsValid(buf))
                               1955                 :            100 :         ReleaseBuffer(buf);
                               1956                 :                : 
                               1957                 :                :     /* free resources */
                               1958                 :            133 :     brinRevmapTerminate(revmap);
                               1959         [ +  + ]:            133 :     if (state)
                               1960                 :                :     {
                               1961                 :             45 :         terminate_brin_buildstate(state);
 3175                          1962                 :             45 :         pfree(indexInfo);
                               1963                 :                :     }
                               1964                 :                : }
                               1965                 :                : 
                               1966                 :                : /*
                               1967                 :                :  * Given a deformed tuple in the build state, convert it into the on-disk
                               1968                 :                :  * format and insert it into the index, making the revmap point to it.
                               1969                 :                :  */
                               1970                 :                : static void
 3446                          1971                 :           1299 : form_and_insert_tuple(BrinBuildState *state)
                               1972                 :                : {
                               1973                 :                :     BrinTuple  *tup;
                               1974                 :                :     Size        size;
                               1975                 :                : 
                               1976                 :           1299 :     tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
                               1977                 :                :                           state->bs_dtuple, &size);
                               1978                 :           1299 :     brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
                               1979                 :                :                   &state->bs_currentInsertBuf, state->bs_currRangeStart,
                               1980                 :                :                   tup, size);
                               1981                 :           1299 :     state->bs_numtuples++;
                               1982                 :                : 
                               1983                 :           1299 :     pfree(tup);
                               1984                 :           1299 : }
                               1985                 :                : 
                               1986                 :                : /*
                               1987                 :                :  * Given a deformed tuple in the build state, convert it into the on-disk
                               1988                 :                :  * format and write it to a (shared) tuplesort (the leader will insert it
                               1989                 :                :  * into the index later).
                               1990                 :                :  */
                               1991                 :                : static void
  128 tomas.vondra@postgre     1992                 :GNC          27 : form_and_spill_tuple(BrinBuildState *state)
                               1993                 :                : {
                               1994                 :                :     BrinTuple  *tup;
                               1995                 :                :     Size        size;
                               1996                 :                : 
                               1997                 :                :     /* don't insert empty tuples in parallel build */
                               1998         [ +  + ]:             27 :     if (state->bs_dtuple->bt_empty_range)
                               1999                 :              3 :         return;
                               2000                 :                : 
                               2001                 :             24 :     tup = brin_form_tuple(state->bs_bdesc, state->bs_currRangeStart,
                               2002                 :                :                           state->bs_dtuple, &size);
                               2003                 :                : 
                               2004                 :                :     /* write the BRIN tuple to the tuplesort */
  106                          2005                 :             24 :     tuplesort_putbrintuple(state->bs_sortstate, tup, size);
                               2006                 :                : 
  128                          2007                 :             24 :     state->bs_numtuples++;
                               2008                 :                : 
                               2009                 :             24 :     pfree(tup);
                               2010                 :                : }
                               2011                 :                : 
                               2012                 :                : /*
                               2013                 :                :  * Given two deformed tuples, adjust the first one so that it's consistent
                               2014                 :                :  * with the summary values in both.
                               2015                 :                :  */
                               2016                 :                : static void
 3446 alvherre@alvh.no-ip.     2017                 :GBC           8 : union_tuples(BrinDesc *bdesc, BrinMemTuple *a, BrinTuple *b)
                               2018                 :                : {
                               2019                 :                :     int         keyno;
                               2020                 :                :     BrinMemTuple *db;
                               2021                 :                :     MemoryContext cxt;
                               2022                 :                :     MemoryContext oldcxt;
                               2023                 :                : 
                               2024                 :                :     /* Use our own memory context to avoid retail pfree */
                               2025                 :              8 :     cxt = AllocSetContextCreate(CurrentMemoryContext,
                               2026                 :                :                                 "brin union",
                               2027                 :                :                                 ALLOCSET_DEFAULT_SIZES);
                               2028                 :              8 :     oldcxt = MemoryContextSwitchTo(cxt);
 2564                          2029                 :              8 :     db = brin_deform_tuple(bdesc, b, NULL);
 3446                          2030                 :              8 :     MemoryContextSwitchTo(oldcxt);
                               2031                 :                : 
                               2032                 :                :     /*
                               2033                 :                :      * Check if the ranges are empty.
                               2034                 :                :      *
                               2035                 :                :      * If at least one of them is empty, we don't need to call per-key union
                               2036                 :                :      * functions at all. If "b" is empty, we just use "a" as the result (it
                               2037                 :                :      * might be empty fine, but that's fine). If "a" is empty but "b" is not,
                               2038                 :                :      * we use "b" as the result (but we have to copy the data into "a" first).
                               2039                 :                :      *
                               2040                 :                :      * Only when both ranges are non-empty, we actually do the per-key merge.
                               2041                 :                :      */
                               2042                 :                : 
                               2043                 :                :     /* If "b" is empty - ignore it and just use "a" (even if it's empty etc.). */
  331 tomas.vondra@postgre     2044         [ -  + ]:              8 :     if (db->bt_empty_range)
                               2045                 :                :     {
                               2046                 :                :         /* skip the per-key merge */
  331 tomas.vondra@postgre     2047                 :UBC           0 :         MemoryContextDelete(cxt);
                               2048                 :              0 :         return;
                               2049                 :                :     }
                               2050                 :                : 
                               2051                 :                :     /*
                               2052                 :                :      * Now we know "b" is not empty. If "a" is empty, then "b" is the result.
                               2053                 :                :      * But we need to copy the data from "b" to "a" first, because that's how
                               2054                 :                :      * we pass result out.
                               2055                 :                :      *
                               2056                 :                :      * We have to copy all the global/per-key flags etc. too.
                               2057                 :                :      */
  331 tomas.vondra@postgre     2058         [ -  + ]:GBC           8 :     if (a->bt_empty_range)
                               2059                 :                :     {
  331 tomas.vondra@postgre     2060         [ #  # ]:UBC           0 :         for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
                               2061                 :                :         {
                               2062                 :                :             int         i;
                               2063                 :              0 :             BrinValues *col_a = &a->bt_columns[keyno];
                               2064                 :              0 :             BrinValues *col_b = &db->bt_columns[keyno];
                               2065                 :              0 :             BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
                               2066                 :                : 
                               2067                 :              0 :             col_a->bv_allnulls = col_b->bv_allnulls;
                               2068                 :              0 :             col_a->bv_hasnulls = col_b->bv_hasnulls;
                               2069                 :                : 
                               2070                 :                :             /* If "b" has no data, we're done. */
                               2071         [ #  # ]:              0 :             if (col_b->bv_allnulls)
                               2072                 :              0 :                 continue;
                               2073                 :                : 
                               2074         [ #  # ]:              0 :             for (i = 0; i < opcinfo->oi_nstored; i++)
                               2075                 :              0 :                 col_a->bv_values[i] =
                               2076                 :              0 :                     datumCopy(col_b->bv_values[i],
                               2077                 :              0 :                               opcinfo->oi_typcache[i]->typbyval,
                               2078                 :              0 :                               opcinfo->oi_typcache[i]->typlen);
                               2079                 :                :         }
                               2080                 :                : 
                               2081                 :                :         /* "a" started empty, but "b" was not empty, so remember that */
                               2082                 :              0 :         a->bt_empty_range = false;
                               2083                 :                : 
                               2084                 :                :         /* skip the per-key merge */
                               2085                 :              0 :         MemoryContextDelete(cxt);
                               2086                 :              0 :         return;
                               2087                 :                :     }
                               2088                 :                : 
                               2089                 :                :     /* Now we know neither range is empty. */
 3446 alvherre@alvh.no-ip.     2090         [ +  + ]:GBC          40 :     for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
                               2091                 :                :     {
                               2092                 :                :         FmgrInfo   *unionFn;
                               2093                 :             32 :         BrinValues *col_a = &a->bt_columns[keyno];
                               2094                 :             32 :         BrinValues *col_b = &db->bt_columns[keyno];
 1118 tomas.vondra@postgre     2095                 :             32 :         BrinOpcInfo *opcinfo = bdesc->bd_info[keyno];
                               2096                 :                : 
                               2097         [ +  - ]:             32 :         if (opcinfo->oi_regular_nulls)
                               2098                 :                :         {
                               2099                 :                :             /* Does the "b" summary represent any NULL values? */
  332                          2100   [ +  +  -  + ]:             32 :             bool        b_has_nulls = (col_b->bv_hasnulls || col_b->bv_allnulls);
                               2101                 :                : 
                               2102                 :                :             /* Adjust "hasnulls". */
                               2103   [ +  +  +  + ]:             32 :             if (!col_a->bv_allnulls && b_has_nulls)
 1118                          2104                 :             23 :                 col_a->bv_hasnulls = true;
                               2105                 :                : 
                               2106                 :                :             /* If there are no values in B, there's nothing left to do. */
                               2107         [ -  + ]:             32 :             if (col_b->bv_allnulls)
 1118 tomas.vondra@postgre     2108                 :UBC           0 :                 continue;
                               2109                 :                : 
                               2110                 :                :             /*
                               2111                 :                :              * Adjust "allnulls".  If A doesn't have values, just copy the
                               2112                 :                :              * values from B into A, and we're done.  We cannot run the
                               2113                 :                :              * operators in this case, because values in A might contain
                               2114                 :                :              * garbage.  Note we already established that B contains values.
                               2115                 :                :              *
                               2116                 :                :              * Also adjust "hasnulls" in order not to forget the summary
                               2117                 :                :              * represents NULL values. This is not redundant with the earlier
                               2118                 :                :              * update, because that only happens when allnulls=false.
                               2119                 :                :              */
 1118 tomas.vondra@postgre     2120         [ +  + ]:GBC          32 :             if (col_a->bv_allnulls)
                               2121                 :              2 :             {
                               2122                 :                :                 int         i;
                               2123                 :                : 
                               2124                 :              2 :                 col_a->bv_allnulls = false;
  332                          2125                 :              2 :                 col_a->bv_hasnulls = true;
                               2126                 :                : 
 1118                          2127         [ +  + ]:              5 :                 for (i = 0; i < opcinfo->oi_nstored; i++)
                               2128                 :              3 :                     col_a->bv_values[i] =
                               2129                 :              3 :                         datumCopy(col_b->bv_values[i],
                               2130                 :              3 :                                   opcinfo->oi_typcache[i]->typbyval,
                               2131                 :              3 :                                   opcinfo->oi_typcache[i]->typlen);
                               2132                 :                : 
                               2133                 :              2 :                 continue;
                               2134                 :                :             }
                               2135                 :                :         }
                               2136                 :                : 
 3446 alvherre@alvh.no-ip.     2137                 :             30 :         unionFn = index_getprocinfo(bdesc->bd_index, keyno + 1,
                               2138                 :                :                                     BRIN_PROCNUM_UNION);
                               2139                 :             30 :         FunctionCall3Coll(unionFn,
                               2140                 :             30 :                           bdesc->bd_index->rd_indcollation[keyno],
                               2141                 :                :                           PointerGetDatum(bdesc),
                               2142                 :                :                           PointerGetDatum(col_a),
                               2143                 :                :                           PointerGetDatum(col_b));
                               2144                 :                :     }
                               2145                 :                : 
                               2146                 :              8 :     MemoryContextDelete(cxt);
                               2147                 :                : }
                               2148                 :                : 
                               2149                 :                : /*
                               2150                 :                :  * brin_vacuum_scan
                               2151                 :                :  *      Do a complete scan of the index during VACUUM.
                               2152                 :                :  *
                               2153                 :                :  * This routine scans the complete index looking for uncataloged index pages,
                               2154                 :                :  * i.e. those that might have been lost due to a crash after index extension
                               2155                 :                :  * and such.
                               2156                 :                :  */
                               2157                 :                : static void
 3168 alvherre@alvh.no-ip.     2158                 :CBC          67 : brin_vacuum_scan(Relation idxrel, BufferAccessStrategy strategy)
                               2159                 :                : {
                               2160                 :                :     BlockNumber nblocks;
                               2161                 :                :     BlockNumber blkno;
                               2162                 :                : 
                               2163                 :                :     /*
                               2164                 :                :      * Scan the index in physical order, and clean up any possible mess in
                               2165                 :                :      * each page.
                               2166                 :                :      */
 2202 tgl@sss.pgh.pa.us        2167                 :             67 :     nblocks = RelationGetNumberOfBlocks(idxrel);
                               2168         [ +  + ]:            358 :     for (blkno = 0; blkno < nblocks; blkno++)
                               2169                 :                :     {
                               2170                 :                :         Buffer      buf;
                               2171                 :                : 
 3168 alvherre@alvh.no-ip.     2172         [ -  + ]:            291 :         CHECK_FOR_INTERRUPTS();
                               2173                 :                : 
                               2174                 :            291 :         buf = ReadBufferExtended(idxrel, MAIN_FORKNUM, blkno,
                               2175                 :                :                                  RBM_NORMAL, strategy);
                               2176                 :                : 
 2202 tgl@sss.pgh.pa.us        2177                 :            291 :         brin_page_cleanup(idxrel, buf);
                               2178                 :                : 
 3168 alvherre@alvh.no-ip.     2179                 :            291 :         ReleaseBuffer(buf);
                               2180                 :                :     }
                               2181                 :                : 
                               2182                 :                :     /*
                               2183                 :                :      * Update all upper pages in the index's FSM, as well.  This ensures not
                               2184                 :                :      * only that we propagate leaf-page FSM updates made by brin_page_cleanup,
                               2185                 :                :      * but also that any pre-existing damage or out-of-dateness is repaired.
                               2186                 :                :      */
 2202 tgl@sss.pgh.pa.us        2187                 :             67 :     FreeSpaceMapVacuum(idxrel);
 3168 alvherre@alvh.no-ip.     2188                 :             67 : }
                               2189                 :                : 
                               2190                 :                : static bool
 1118 tomas.vondra@postgre     2191                 :         393651 : add_values_to_range(Relation idxRel, BrinDesc *bdesc, BrinMemTuple *dtup,
                               2192                 :                :                     const Datum *values, const bool *nulls)
                               2193                 :                : {
                               2194                 :                :     int         keyno;
                               2195                 :                : 
                               2196                 :                :     /* If the range starts empty, we're certainly going to modify it. */
  331                          2197                 :         393651 :     bool        modified = dtup->bt_empty_range;
                               2198                 :                : 
                               2199                 :                :     /*
                               2200                 :                :      * Compare the key values of the new tuple to the stored index values; our
                               2201                 :                :      * deformed tuple will get updated if the new tuple doesn't fit the
                               2202                 :                :      * original range (note this means we can't break out of the loop early).
                               2203                 :                :      * Make a note of whether this happens, so that we know to insert the
                               2204                 :                :      * modified tuple later.
                               2205                 :                :      */
 1118                          2206         [ +  + ]:         932175 :     for (keyno = 0; keyno < bdesc->bd_tupdesc->natts; keyno++)
                               2207                 :                :     {
                               2208                 :                :         Datum       result;
                               2209                 :                :         BrinValues *bval;
                               2210                 :                :         FmgrInfo   *addValue;
                               2211                 :                :         bool        has_nulls;
                               2212                 :                : 
                               2213                 :         538524 :         bval = &dtup->bt_columns[keyno];
                               2214                 :                : 
                               2215                 :                :         /*
                               2216                 :                :          * Does the range have actual NULL values? Either of the flags can be
                               2217                 :                :          * set, but we ignore the state before adding first row.
                               2218                 :                :          *
                               2219                 :                :          * We have to remember this, because we'll modify the flags and we
                               2220                 :                :          * need to know if the range started as empty.
                               2221                 :                :          */
  331                          2222         [ +  + ]:        1058660 :         has_nulls = ((!dtup->bt_empty_range) &&
                               2223   [ +  +  +  + ]:         520136 :                      (bval->bv_hasnulls || bval->bv_allnulls));
                               2224                 :                : 
                               2225                 :                :         /*
                               2226                 :                :          * If the value we're adding is NULL, handle it locally. Otherwise
                               2227                 :                :          * call the BRIN_PROCNUM_ADDVALUE procedure.
                               2228                 :                :          */
 1118                          2229   [ +  -  +  + ]:         538524 :         if (bdesc->bd_info[keyno]->oi_regular_nulls && nulls[keyno])
                               2230                 :                :         {
                               2231                 :                :             /*
                               2232                 :                :              * If the new value is null, we record that we saw it if it's the
                               2233                 :                :              * first one; otherwise, there's nothing to do.
                               2234                 :                :              */
                               2235         [ +  + ]:          13757 :             if (!bval->bv_hasnulls)
                               2236                 :                :             {
                               2237                 :           1947 :                 bval->bv_hasnulls = true;
                               2238                 :           1947 :                 modified = true;
                               2239                 :                :             }
                               2240                 :                : 
                               2241                 :          13757 :             continue;
                               2242                 :                :         }
                               2243                 :                : 
                               2244                 :         524767 :         addValue = index_getprocinfo(idxRel, keyno + 1,
                               2245                 :                :                                      BRIN_PROCNUM_ADDVALUE);
                               2246                 :         524767 :         result = FunctionCall4Coll(addValue,
                               2247                 :         524767 :                                    idxRel->rd_indcollation[keyno],
                               2248                 :                :                                    PointerGetDatum(bdesc),
                               2249                 :                :                                    PointerGetDatum(bval),
                               2250                 :         524767 :                                    values[keyno],
                               2251                 :         524767 :                                    nulls[keyno]);
                               2252                 :                :         /* if that returned true, we need to insert the updated tuple */
                               2253                 :         524767 :         modified |= DatumGetBool(result);
                               2254                 :                : 
                               2255                 :                :         /*
                               2256                 :                :          * If the range was had actual NULL values (i.e. did not start empty),
                               2257                 :                :          * make sure we don't forget about the NULL values. Either the
                               2258                 :                :          * allnulls flag is still set to true, or (if the opclass cleared it)
                               2259                 :                :          * we need to set hasnulls=true.
                               2260                 :                :          *
                               2261                 :                :          * XXX This can only happen when the opclass modified the tuple, so
                               2262                 :                :          * the modified flag should be set.
                               2263                 :                :          */
  331                          2264   [ +  +  +  +  :         524767 :         if (has_nulls && !(bval->bv_hasnulls || bval->bv_allnulls))
                                              +  - ]
                               2265                 :                :         {
                               2266         [ -  + ]:              2 :             Assert(modified);
                               2267                 :              2 :             bval->bv_hasnulls = true;
                               2268                 :                :         }
                               2269                 :                :     }
                               2270                 :                : 
                               2271                 :                :     /*
                               2272                 :                :      * After updating summaries for all the keys, mark it as not empty.
                               2273                 :                :      *
                               2274                 :                :      * If we're actually changing the flag value (i.e. tuple started as
                               2275                 :                :      * empty), we should have modified the tuple. So we should not see empty
                               2276                 :                :      * range that was not modified.
                               2277                 :                :      */
                               2278   [ +  +  -  + ]:         393651 :     Assert(!dtup->bt_empty_range || modified);
                               2279                 :         393651 :     dtup->bt_empty_range = false;
                               2280                 :                : 
 1118                          2281                 :         393651 :     return modified;
                               2282                 :                : }
                               2283                 :                : 
                               2284                 :                : static bool
                               2285                 :          94968 : check_null_keys(BrinValues *bval, ScanKey *nullkeys, int nnullkeys)
                               2286                 :                : {
                               2287                 :                :     int         keyno;
                               2288                 :                : 
                               2289                 :                :     /*
                               2290                 :                :      * First check if there are any IS [NOT] NULL scan keys, and if we're
                               2291                 :                :      * violating them.
                               2292                 :                :      */
                               2293         [ +  + ]:          95586 :     for (keyno = 0; keyno < nnullkeys; keyno++)
                               2294                 :                :     {
                               2295                 :           1116 :         ScanKey     key = nullkeys[keyno];
                               2296                 :                : 
                               2297         [ -  + ]:           1116 :         Assert(key->sk_attno == bval->bv_attno);
                               2298                 :                : 
                               2299                 :                :         /* Handle only IS NULL/IS NOT NULL tests */
                               2300         [ -  + ]:           1116 :         if (!(key->sk_flags & SK_ISNULL))
 1118 tomas.vondra@postgre     2301                 :UBC           0 :             continue;
                               2302                 :                : 
 1118 tomas.vondra@postgre     2303         [ +  + ]:CBC        1116 :         if (key->sk_flags & SK_SEARCHNULL)
                               2304                 :                :         {
                               2305                 :                :             /* IS NULL scan key, but range has no NULLs */
                               2306   [ +  +  +  + ]:            558 :             if (!bval->bv_allnulls && !bval->bv_hasnulls)
                               2307                 :            489 :                 return false;
                               2308                 :                :         }
                               2309         [ +  - ]:            558 :         else if (key->sk_flags & SK_SEARCHNOTNULL)
                               2310                 :                :         {
                               2311                 :                :             /*
                               2312                 :                :              * For IS NOT NULL, we can only skip ranges that are known to have
                               2313                 :                :              * only nulls.
                               2314                 :                :              */
                               2315         [ +  + ]:            558 :             if (bval->bv_allnulls)
                               2316                 :              9 :                 return false;
                               2317                 :                :         }
                               2318                 :                :         else
                               2319                 :                :         {
                               2320                 :                :             /*
                               2321                 :                :              * Neither IS NULL nor IS NOT NULL was used; assume all indexable
                               2322                 :                :              * operators are strict and thus return false with NULL value in
                               2323                 :                :              * the scan key.
                               2324                 :                :              */
 1118 tomas.vondra@postgre     2325                 :UBC           0 :             return false;
                               2326                 :                :         }
                               2327                 :                :     }
                               2328                 :                : 
 1118 tomas.vondra@postgre     2329                 :CBC       94470 :     return true;
                               2330                 :                : }
                               2331                 :                : 
                               2332                 :                : static void
  128 tomas.vondra@postgre     2333                 :GNC           2 : _brin_begin_parallel(BrinBuildState *buildstate, Relation heap, Relation index,
                               2334                 :                :                      bool isconcurrent, int request)
                               2335                 :                : {
                               2336                 :                :     ParallelContext *pcxt;
                               2337                 :                :     int         scantuplesortstates;
                               2338                 :                :     Snapshot    snapshot;
                               2339                 :                :     Size        estbrinshared;
                               2340                 :                :     Size        estsort;
                               2341                 :                :     BrinShared *brinshared;
                               2342                 :                :     Sharedsort *sharedsort;
                               2343                 :              2 :     BrinLeader *brinleader = (BrinLeader *) palloc0(sizeof(BrinLeader));
                               2344                 :                :     WalUsage   *walusage;
                               2345                 :                :     BufferUsage *bufferusage;
                               2346                 :              2 :     bool        leaderparticipates = true;
                               2347                 :                :     int         querylen;
                               2348                 :                : 
                               2349                 :                : #ifdef DISABLE_LEADER_PARTICIPATION
                               2350                 :                :     leaderparticipates = false;
                               2351                 :                : #endif
                               2352                 :                : 
                               2353                 :                :     /*
                               2354                 :                :      * Enter parallel mode, and create context for parallel build of brin
                               2355                 :                :      * index
                               2356                 :                :      */
                               2357                 :              2 :     EnterParallelMode();
                               2358         [ -  + ]:              2 :     Assert(request > 0);
                               2359                 :              2 :     pcxt = CreateParallelContext("postgres", "_brin_parallel_build_main",
                               2360                 :                :                                  request);
                               2361                 :                : 
                               2362         [ +  - ]:              2 :     scantuplesortstates = leaderparticipates ? request + 1 : request;
                               2363                 :                : 
                               2364                 :                :     /*
                               2365                 :                :      * Prepare for scan of the base relation.  In a normal index build, we use
                               2366                 :                :      * SnapshotAny because we must retrieve all tuples and do our own time
                               2367                 :                :      * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
                               2368                 :                :      * concurrent build, we take a regular MVCC snapshot and index whatever's
                               2369                 :                :      * live according to that.
                               2370                 :                :      */
                               2371         [ +  - ]:              2 :     if (!isconcurrent)
                               2372                 :              2 :         snapshot = SnapshotAny;
                               2373                 :                :     else
  128 tomas.vondra@postgre     2374                 :UNC           0 :         snapshot = RegisterSnapshot(GetTransactionSnapshot());
                               2375                 :                : 
                               2376                 :                :     /*
                               2377                 :                :      * Estimate size for our own PARALLEL_KEY_BRIN_SHARED workspace.
                               2378                 :                :      */
  128 tomas.vondra@postgre     2379                 :GNC           2 :     estbrinshared = _brin_parallel_estimate_shared(heap, snapshot);
                               2380                 :              2 :     shm_toc_estimate_chunk(&pcxt->estimator, estbrinshared);
                               2381                 :              2 :     estsort = tuplesort_estimate_shared(scantuplesortstates);
                               2382                 :              2 :     shm_toc_estimate_chunk(&pcxt->estimator, estsort);
                               2383                 :                : 
                               2384                 :              2 :     shm_toc_estimate_keys(&pcxt->estimator, 2);
                               2385                 :                : 
                               2386                 :                :     /*
                               2387                 :                :      * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
                               2388                 :                :      * and PARALLEL_KEY_BUFFER_USAGE.
                               2389                 :                :      *
                               2390                 :                :      * If there are no extensions loaded that care, we could skip this.  We
                               2391                 :                :      * have no way of knowing whether anyone's looking at pgWalUsage or
                               2392                 :                :      * pgBufferUsage, so do it unconditionally.
                               2393                 :                :      */
                               2394                 :              2 :     shm_toc_estimate_chunk(&pcxt->estimator,
                               2395                 :                :                            mul_size(sizeof(WalUsage), pcxt->nworkers));
                               2396                 :              2 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
                               2397                 :              2 :     shm_toc_estimate_chunk(&pcxt->estimator,
                               2398                 :                :                            mul_size(sizeof(BufferUsage), pcxt->nworkers));
                               2399                 :              2 :     shm_toc_estimate_keys(&pcxt->estimator, 1);
                               2400                 :                : 
                               2401                 :                :     /* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
                               2402         [ +  - ]:              2 :     if (debug_query_string)
                               2403                 :                :     {
                               2404                 :              2 :         querylen = strlen(debug_query_string);
                               2405                 :              2 :         shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
                               2406                 :              2 :         shm_toc_estimate_keys(&pcxt->estimator, 1);
                               2407                 :                :     }
                               2408                 :                :     else
  128 tomas.vondra@postgre     2409                 :UNC           0 :         querylen = 0;           /* keep compiler quiet */
                               2410                 :                : 
                               2411                 :                :     /* Everyone's had a chance to ask for space, so now create the DSM */
  128 tomas.vondra@postgre     2412                 :GNC           2 :     InitializeParallelDSM(pcxt);
                               2413                 :                : 
                               2414                 :                :     /* If no DSM segment was available, back out (do serial build) */
                               2415         [ -  + ]:              2 :     if (pcxt->seg == NULL)
                               2416                 :                :     {
  128 tomas.vondra@postgre     2417   [ #  #  #  # ]:UNC           0 :         if (IsMVCCSnapshot(snapshot))
                               2418                 :              0 :             UnregisterSnapshot(snapshot);
                               2419                 :              0 :         DestroyParallelContext(pcxt);
                               2420                 :              0 :         ExitParallelMode();
                               2421                 :              0 :         return;
                               2422                 :                :     }
                               2423                 :                : 
                               2424                 :                :     /* Store shared build state, for which we reserved space */
  128 tomas.vondra@postgre     2425                 :GNC           2 :     brinshared = (BrinShared *) shm_toc_allocate(pcxt->toc, estbrinshared);
                               2426                 :                :     /* Initialize immutable state */
                               2427                 :              2 :     brinshared->heaprelid = RelationGetRelid(heap);
                               2428                 :              2 :     brinshared->indexrelid = RelationGetRelid(index);
                               2429                 :              2 :     brinshared->isconcurrent = isconcurrent;
                               2430                 :              2 :     brinshared->scantuplesortstates = scantuplesortstates;
                               2431                 :              2 :     brinshared->pagesPerRange = buildstate->bs_pagesPerRange;
                               2432                 :              2 :     ConditionVariableInit(&brinshared->workersdonecv);
                               2433                 :              2 :     SpinLockInit(&brinshared->mutex);
                               2434                 :                : 
                               2435                 :                :     /* Initialize mutable state */
                               2436                 :              2 :     brinshared->nparticipantsdone = 0;
                               2437                 :              2 :     brinshared->reltuples = 0.0;
                               2438                 :              2 :     brinshared->indtuples = 0.0;
                               2439                 :                : 
                               2440                 :              2 :     table_parallelscan_initialize(heap,
                               2441                 :                :                                   ParallelTableScanFromBrinShared(brinshared),
                               2442                 :                :                                   snapshot);
                               2443                 :                : 
                               2444                 :                :     /*
                               2445                 :                :      * Store shared tuplesort-private state, for which we reserved space.
                               2446                 :                :      * Then, initialize opaque state using tuplesort routine.
                               2447                 :                :      */
                               2448                 :              2 :     sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
                               2449                 :              2 :     tuplesort_initialize_shared(sharedsort, scantuplesortstates,
                               2450                 :                :                                 pcxt->seg);
                               2451                 :                : 
                               2452                 :                :     /*
                               2453                 :                :      * Store shared tuplesort-private state, for which we reserved space.
                               2454                 :                :      * Then, initialize opaque state using tuplesort routine.
                               2455                 :                :      */
                               2456                 :              2 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_BRIN_SHARED, brinshared);
                               2457                 :              2 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
                               2458                 :                : 
                               2459                 :                :     /* Store query string for workers */
                               2460         [ +  - ]:              2 :     if (debug_query_string)
                               2461                 :                :     {
                               2462                 :                :         char       *sharedquery;
                               2463                 :                : 
                               2464                 :              2 :         sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
                               2465                 :              2 :         memcpy(sharedquery, debug_query_string, querylen + 1);
                               2466                 :              2 :         shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
                               2467                 :                :     }
                               2468                 :                : 
                               2469                 :                :     /*
                               2470                 :                :      * Allocate space for each worker's WalUsage and BufferUsage; no need to
                               2471                 :                :      * initialize.
                               2472                 :                :      */
                               2473                 :              2 :     walusage = shm_toc_allocate(pcxt->toc,
                               2474                 :              2 :                                 mul_size(sizeof(WalUsage), pcxt->nworkers));
                               2475                 :              2 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
                               2476                 :              2 :     bufferusage = shm_toc_allocate(pcxt->toc,
                               2477                 :              2 :                                    mul_size(sizeof(BufferUsage), pcxt->nworkers));
                               2478                 :              2 :     shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
                               2479                 :                : 
                               2480                 :                :     /* Launch workers, saving status for leader/caller */
                               2481                 :              2 :     LaunchParallelWorkers(pcxt);
                               2482                 :              2 :     brinleader->pcxt = pcxt;
                               2483                 :              2 :     brinleader->nparticipanttuplesorts = pcxt->nworkers_launched;
                               2484         [ +  - ]:              2 :     if (leaderparticipates)
                               2485                 :              2 :         brinleader->nparticipanttuplesorts++;
                               2486                 :              2 :     brinleader->brinshared = brinshared;
                               2487                 :              2 :     brinleader->sharedsort = sharedsort;
                               2488                 :              2 :     brinleader->snapshot = snapshot;
                               2489                 :              2 :     brinleader->walusage = walusage;
                               2490                 :              2 :     brinleader->bufferusage = bufferusage;
                               2491                 :                : 
                               2492                 :                :     /* If no workers were successfully launched, back out (do serial build) */
                               2493         [ +  + ]:              2 :     if (pcxt->nworkers_launched == 0)
                               2494                 :                :     {
                               2495                 :              1 :         _brin_end_parallel(brinleader, NULL);
                               2496                 :              1 :         return;
                               2497                 :                :     }
                               2498                 :                : 
                               2499                 :                :     /* Save leader state now that it's clear build will be parallel */
                               2500                 :              1 :     buildstate->bs_leader = brinleader;
                               2501                 :                : 
                               2502                 :                :     /* Join heap scan ourselves */
                               2503         [ +  - ]:              1 :     if (leaderparticipates)
                               2504                 :              1 :         _brin_leader_participate_as_worker(buildstate, heap, index);
                               2505                 :                : 
                               2506                 :                :     /*
                               2507                 :                :      * Caller needs to wait for all launched workers when we return.  Make
                               2508                 :                :      * sure that the failure-to-start case will not hang forever.
                               2509                 :                :      */
                               2510                 :              1 :     WaitForParallelWorkersToAttach(pcxt);
                               2511                 :                : }
                               2512                 :                : 
                               2513                 :                : /*
                               2514                 :                :  * Shut down workers, destroy parallel context, and end parallel mode.
                               2515                 :                :  */
                               2516                 :                : static void
                               2517                 :              2 : _brin_end_parallel(BrinLeader *brinleader, BrinBuildState *state)
                               2518                 :                : {
                               2519                 :                :     int         i;
                               2520                 :                :     BrinTuple  *btup;
                               2521                 :              2 :     BrinMemTuple *memtuple = NULL;
                               2522                 :                :     Size        tuplen;
                               2523                 :              2 :     BrinShared *brinshared = brinleader->brinshared;
                               2524                 :              2 :     BlockNumber prevblkno = InvalidBlockNumber;
                               2525                 :                :     MemoryContext rangeCxt,
                               2526                 :                :                 oldCxt;
                               2527                 :                : 
                               2528                 :                :     /* Shutdown worker processes */
                               2529                 :              2 :     WaitForParallelWorkersToFinish(brinleader->pcxt);
                               2530                 :                : 
                               2531                 :                :     /*
                               2532                 :                :      * If we didn't actually launch workers, we still have to make sure to
                               2533                 :                :      * exit parallel mode.
                               2534                 :                :      */
                               2535         [ +  + ]:              2 :     if (!state)
                               2536                 :              1 :         goto cleanup;
                               2537                 :                : 
                               2538                 :                :     /* copy the data into leader state (we have to wait for the workers ) */
                               2539                 :              1 :     state->bs_reltuples = brinshared->reltuples;
                               2540                 :              1 :     state->bs_numtuples = brinshared->indtuples;
                               2541                 :                : 
                               2542                 :                :     /* do the actual sort in the leader */
  106                          2543                 :              1 :     tuplesort_performsort(state->bs_sortstate);
                               2544                 :                : 
                               2545                 :                :     /*
                               2546                 :                :      * Initialize BrinMemTuple we'll use to union summaries from workers (in
                               2547                 :                :      * case they happened to produce parts of the same paga range).
                               2548                 :                :      */
  128                          2549                 :              1 :     memtuple = brin_new_memtuple(state->bs_bdesc);
                               2550                 :                : 
                               2551                 :                :     /*
                               2552                 :                :      * Create a memory context we'll reset to combine results for a single
                               2553                 :                :      * page range (received from the workers). We don't expect huge number of
                               2554                 :                :      * overlaps under regular circumstances, because for large tables the
                               2555                 :                :      * chunk size is likely larger than the BRIN page range), but it can
                               2556                 :                :      * happen, and the union functions may do all kinds of stuff. So we better
                               2557                 :                :      * reset the context once in a while.
                               2558                 :                :      */
                               2559                 :              1 :     rangeCxt = AllocSetContextCreate(CurrentMemoryContext,
                               2560                 :                :                                      "brin union",
                               2561                 :                :                                      ALLOCSET_DEFAULT_SIZES);
                               2562                 :              1 :     oldCxt = MemoryContextSwitchTo(rangeCxt);
                               2563                 :                : 
                               2564                 :                :     /*
                               2565                 :                :      * Read the BRIN tuples from the shared tuplesort, sorted by block number.
                               2566                 :                :      * That probably gives us an index that is cheaper to scan, thanks to
                               2567                 :                :      * mostly getting data from the same index page as before.
                               2568                 :                :      */
  106                          2569         [ +  + ]:             25 :     while ((btup = tuplesort_getbrintuple(state->bs_sortstate, &tuplen, true)) != NULL)
                               2570                 :                :     {
                               2571                 :                :         /* Ranges should be multiples of pages_per_range for the index. */
  128                          2572         [ -  + ]:             24 :         Assert(btup->bt_blkno % brinshared->pagesPerRange == 0);
                               2573                 :                : 
                               2574                 :                :         /*
                               2575                 :                :          * Do we need to union summaries for the same page range?
                               2576                 :                :          *
                               2577                 :                :          * If this is the first brin tuple we read, then just deform it into
                               2578                 :                :          * the memtuple, and continue with the next one from tuplesort. We
                               2579                 :                :          * however may need to insert empty summaries into the index.
                               2580                 :                :          *
                               2581                 :                :          * If it's the same block as the last we saw, we simply union the brin
                               2582                 :                :          * tuple into it, and we're done - we don't even need to insert empty
                               2583                 :                :          * ranges, because that was done earlier when we saw the first brin
                               2584                 :                :          * tuple (for this range).
                               2585                 :                :          *
                               2586                 :                :          * Finally, if it's not the first brin tuple, and it's not the same
                               2587                 :                :          * page range, we need to do the insert and then deform the tuple into
                               2588                 :                :          * the memtuple. Then we'll insert empty ranges before the new brin
                               2589                 :                :          * tuple, if needed.
                               2590                 :                :          */
                               2591         [ +  + ]:             24 :         if (prevblkno == InvalidBlockNumber)
                               2592                 :                :         {
                               2593                 :                :             /* First brin tuples, just deform into memtuple. */
                               2594                 :              1 :             memtuple = brin_deform_tuple(state->bs_bdesc, btup, memtuple);
                               2595                 :                : 
                               2596                 :                :             /* continue to insert empty pages before thisblock */
                               2597                 :                :         }
                               2598         [ +  + ]:             23 :         else if (memtuple->bt_blkno == btup->bt_blkno)
                               2599                 :                :         {
                               2600                 :                :             /*
                               2601                 :                :              * Not the first brin tuple, but same page range as the previous
                               2602                 :                :              * one, so we can merge it into the memtuple.
                               2603                 :                :              */
                               2604                 :              8 :             union_tuples(state->bs_bdesc, memtuple, btup);
                               2605                 :              8 :             continue;
                               2606                 :                :         }
                               2607                 :                :         else
                               2608                 :                :         {
                               2609                 :                :             BrinTuple  *tmp;
                               2610                 :                :             Size        len;
                               2611                 :                : 
                               2612                 :                :             /*
                               2613                 :                :              * We got brin tuple for a different page range, so form a brin
                               2614                 :                :              * tuple from the memtuple, insert it, and re-init the memtuple
                               2615                 :                :              * from the new brin tuple.
                               2616                 :                :              */
                               2617                 :             15 :             tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
                               2618                 :                :                                   memtuple, &len);
                               2619                 :                : 
                               2620                 :             15 :             brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
                               2621                 :                :                           &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
                               2622                 :                : 
                               2623                 :                :             /*
                               2624                 :                :              * Reset the per-output-range context. This frees all the memory
                               2625                 :                :              * possibly allocated by the union functions, and also the BRIN
                               2626                 :                :              * tuple we just formed and inserted.
                               2627                 :                :              */
                               2628                 :             15 :             MemoryContextReset(rangeCxt);
                               2629                 :                : 
                               2630                 :             15 :             memtuple = brin_deform_tuple(state->bs_bdesc, btup, memtuple);
                               2631                 :                : 
                               2632                 :                :             /* continue to insert empty pages before thisblock */
                               2633                 :                :         }
                               2634                 :                : 
                               2635                 :                :         /* Fill empty ranges for all ranges missing in the tuplesort. */
                               2636                 :             16 :         brin_fill_empty_ranges(state, prevblkno, btup->bt_blkno);
                               2637                 :                : 
                               2638                 :             16 :         prevblkno = btup->bt_blkno;
                               2639                 :                :     }
                               2640                 :                : 
  106                          2641                 :              1 :     tuplesort_end(state->bs_sortstate);
                               2642                 :                : 
                               2643                 :                :     /* Fill the BRIN tuple for the last page range with data. */
  128                          2644         [ +  - ]:              1 :     if (prevblkno != InvalidBlockNumber)
                               2645                 :                :     {
                               2646                 :                :         BrinTuple  *tmp;
                               2647                 :                :         Size        len;
                               2648                 :                : 
                               2649                 :              1 :         tmp = brin_form_tuple(state->bs_bdesc, memtuple->bt_blkno,
                               2650                 :                :                               memtuple, &len);
                               2651                 :                : 
                               2652                 :              1 :         brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
                               2653                 :                :                       &state->bs_currentInsertBuf, tmp->bt_blkno, tmp, len);
                               2654                 :                : 
                               2655                 :              1 :         pfree(tmp);
                               2656                 :                :     }
                               2657                 :                : 
                               2658                 :                :     /* Fill empty ranges at the end, for all ranges missing in the tuplesort. */
                               2659                 :              1 :     brin_fill_empty_ranges(state, prevblkno, state->bs_maxRangeStart);
                               2660                 :                : 
                               2661                 :                :     /*
                               2662                 :                :      * Switch back to the original memory context, and destroy the one we
                               2663                 :                :      * created to isolate the union_tuple calls.
                               2664                 :                :      */
                               2665                 :              1 :     MemoryContextSwitchTo(oldCxt);
                               2666                 :              1 :     MemoryContextDelete(rangeCxt);
                               2667                 :                : 
                               2668                 :                :     /*
                               2669                 :                :      * Next, accumulate WAL usage.  (This must wait for the workers to finish,
                               2670                 :                :      * or we might get incomplete data.)
                               2671                 :                :      */
                               2672         [ +  + ]:              4 :     for (i = 0; i < brinleader->pcxt->nworkers_launched; i++)
                               2673                 :              3 :         InstrAccumParallelQuery(&brinleader->bufferusage[i], &brinleader->walusage[i]);
                               2674                 :                : 
                               2675                 :              1 : cleanup:
                               2676                 :                : 
                               2677                 :                :     /* Free last reference to MVCC snapshot, if one was used */
                               2678   [ +  -  -  + ]:              2 :     if (IsMVCCSnapshot(brinleader->snapshot))
  128 tomas.vondra@postgre     2679                 :UNC           0 :         UnregisterSnapshot(brinleader->snapshot);
  128 tomas.vondra@postgre     2680                 :GNC           2 :     DestroyParallelContext(brinleader->pcxt);
                               2681                 :              2 :     ExitParallelMode();
                               2682                 :              2 : }
                               2683                 :                : 
                               2684                 :                : /*
                               2685                 :                :  * Returns size of shared memory required to store state for a parallel
                               2686                 :                :  * brin index build based on the snapshot its parallel scan will use.
                               2687                 :                :  */
                               2688                 :                : static Size
                               2689                 :              2 : _brin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
                               2690                 :                : {
                               2691                 :                :     /* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
                               2692                 :              2 :     return add_size(BUFFERALIGN(sizeof(BrinShared)),
                               2693                 :                :                     table_parallelscan_estimate(heap, snapshot));
                               2694                 :                : }
                               2695                 :                : 
                               2696                 :                : /*
                               2697                 :                :  * Within leader, participate as a parallel worker.
                               2698                 :                :  */
                               2699                 :                : static void
                               2700                 :              1 : _brin_leader_participate_as_worker(BrinBuildState *buildstate, Relation heap, Relation index)
                               2701                 :                : {
                               2702                 :              1 :     BrinLeader *brinleader = buildstate->bs_leader;
                               2703                 :                :     int         sortmem;
                               2704                 :                : 
                               2705                 :                :     /*
                               2706                 :                :      * Might as well use reliable figure when doling out maintenance_work_mem
                               2707                 :                :      * (when requested number of workers were not launched, this will be
                               2708                 :                :      * somewhat higher than it is for other workers).
                               2709                 :                :      */
                               2710                 :              1 :     sortmem = maintenance_work_mem / brinleader->nparticipanttuplesorts;
                               2711                 :                : 
                               2712                 :                :     /* Perform work common to all participants */
  106                          2713                 :              1 :     _brin_parallel_scan_and_build(buildstate, brinleader->brinshared,
                               2714                 :                :                                   brinleader->sharedsort, heap, index, sortmem, true);
  128                          2715                 :              1 : }
                               2716                 :                : 
                               2717                 :                : /*
                               2718                 :                :  * Perform a worker's portion of a parallel sort.
                               2719                 :                :  *
                               2720                 :                :  * This generates a tuplesort for the worker portion of the table.
                               2721                 :                :  *
                               2722                 :                :  * sortmem is the amount of working memory to use within each worker,
                               2723                 :                :  * expressed in KBs.
                               2724                 :                :  *
                               2725                 :                :  * When this returns, workers are done, and need only release resources.
                               2726                 :                :  */
                               2727                 :                : static void
  106                          2728                 :              4 : _brin_parallel_scan_and_build(BrinBuildState *state,
                               2729                 :                :                               BrinShared *brinshared, Sharedsort *sharedsort,
                               2730                 :                :                               Relation heap, Relation index,
                               2731                 :                :                               int sortmem, bool progress)
                               2732                 :                : {
                               2733                 :                :     SortCoordinate coordinate;
                               2734                 :                :     TableScanDesc scan;
                               2735                 :                :     double      reltuples;
                               2736                 :                :     IndexInfo  *indexInfo;
                               2737                 :                : 
                               2738                 :                :     /* Initialize local tuplesort coordination state */
  128                          2739                 :              4 :     coordinate = palloc0(sizeof(SortCoordinateData));
                               2740                 :              4 :     coordinate->isWorker = true;
                               2741                 :              4 :     coordinate->nParticipants = -1;
                               2742                 :              4 :     coordinate->sharedsort = sharedsort;
                               2743                 :                : 
                               2744                 :                :     /* Begin "partial" tuplesort */
  106                          2745                 :              4 :     state->bs_sortstate = tuplesort_begin_index_brin(sortmem, coordinate,
                               2746                 :                :                                                      TUPLESORT_NONE);
                               2747                 :                : 
                               2748                 :                :     /* Join parallel scan */
  128                          2749                 :              4 :     indexInfo = BuildIndexInfo(index);
                               2750                 :              4 :     indexInfo->ii_Concurrent = brinshared->isconcurrent;
                               2751                 :                : 
                               2752                 :              4 :     scan = table_beginscan_parallel(heap,
                               2753                 :                :                                     ParallelTableScanFromBrinShared(brinshared));
                               2754                 :                : 
                               2755                 :              4 :     reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
                               2756                 :                :                                        brinbuildCallbackParallel, state, scan);
                               2757                 :                : 
                               2758                 :                :     /* insert the last item */
                               2759                 :              4 :     form_and_spill_tuple(state);
                               2760                 :                : 
                               2761                 :                :     /* sort the BRIN ranges built by this worker */
  106                          2762                 :              4 :     tuplesort_performsort(state->bs_sortstate);
                               2763                 :                : 
  128                          2764                 :              4 :     state->bs_reltuples += reltuples;
                               2765                 :                : 
                               2766                 :                :     /*
                               2767                 :                :      * Done.  Record ambuild statistics.
                               2768                 :                :      */
                               2769         [ -  + ]:              4 :     SpinLockAcquire(&brinshared->mutex);
                               2770                 :              4 :     brinshared->nparticipantsdone++;
                               2771                 :              4 :     brinshared->reltuples += state->bs_reltuples;
                               2772                 :              4 :     brinshared->indtuples += state->bs_numtuples;
                               2773                 :              4 :     SpinLockRelease(&brinshared->mutex);
                               2774                 :                : 
                               2775                 :                :     /* Notify leader */
                               2776                 :              4 :     ConditionVariableSignal(&brinshared->workersdonecv);
                               2777                 :                : 
  106                          2778                 :              4 :     tuplesort_end(state->bs_sortstate);
  128                          2779                 :              4 : }
                               2780                 :                : 
                               2781                 :                : /*
                               2782                 :                :  * Perform work within a launched parallel process.
                               2783                 :                :  */
                               2784                 :                : void
                               2785                 :              3 : _brin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
                               2786                 :                : {
                               2787                 :                :     char       *sharedquery;
                               2788                 :                :     BrinShared *brinshared;
                               2789                 :                :     Sharedsort *sharedsort;
                               2790                 :                :     BrinBuildState *buildstate;
                               2791                 :                :     Relation    heapRel;
                               2792                 :                :     Relation    indexRel;
                               2793                 :                :     LOCKMODE    heapLockmode;
                               2794                 :                :     LOCKMODE    indexLockmode;
                               2795                 :                :     WalUsage   *walusage;
                               2796                 :                :     BufferUsage *bufferusage;
                               2797                 :                :     int         sortmem;
                               2798                 :                : 
                               2799                 :                :     /*
                               2800                 :                :      * The only possible status flag that can be set to the parallel worker is
                               2801                 :                :      * PROC_IN_SAFE_IC.
                               2802                 :                :      */
                               2803   [ -  +  -  - ]:              3 :     Assert((MyProc->statusFlags == 0) ||
                               2804                 :                :            (MyProc->statusFlags == PROC_IN_SAFE_IC));
                               2805                 :                : 
                               2806                 :                :     /* Set debug_query_string for individual workers first */
                               2807                 :              3 :     sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
                               2808                 :              3 :     debug_query_string = sharedquery;
                               2809                 :                : 
                               2810                 :                :     /* Report the query string from leader */
                               2811                 :              3 :     pgstat_report_activity(STATE_RUNNING, debug_query_string);
                               2812                 :                : 
                               2813                 :                :     /* Look up brin shared state */
                               2814                 :              3 :     brinshared = shm_toc_lookup(toc, PARALLEL_KEY_BRIN_SHARED, false);
                               2815                 :                : 
                               2816                 :                :     /* Open relations using lock modes known to be obtained by index.c */
                               2817         [ +  - ]:              3 :     if (!brinshared->isconcurrent)
                               2818                 :                :     {
                               2819                 :              3 :         heapLockmode = ShareLock;
                               2820                 :              3 :         indexLockmode = AccessExclusiveLock;
                               2821                 :                :     }
                               2822                 :                :     else
                               2823                 :                :     {
  128 tomas.vondra@postgre     2824                 :UNC           0 :         heapLockmode = ShareUpdateExclusiveLock;
                               2825                 :              0 :         indexLockmode = RowExclusiveLock;
                               2826                 :                :     }
                               2827                 :                : 
                               2828                 :                :     /* Open relations within worker */
  128 tomas.vondra@postgre     2829                 :GNC           3 :     heapRel = table_open(brinshared->heaprelid, heapLockmode);
                               2830                 :              3 :     indexRel = index_open(brinshared->indexrelid, indexLockmode);
                               2831                 :                : 
                               2832                 :              3 :     buildstate = initialize_brin_buildstate(indexRel, NULL,
                               2833                 :                :                                             brinshared->pagesPerRange,
                               2834                 :                :                                             InvalidBlockNumber);
                               2835                 :                : 
                               2836                 :                :     /* Look up shared state private to tuplesort.c */
                               2837                 :              3 :     sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
                               2838                 :              3 :     tuplesort_attach_shared(sharedsort, seg);
                               2839                 :                : 
                               2840                 :                :     /* Prepare to track buffer usage during parallel execution */
                               2841                 :              3 :     InstrStartParallelQuery();
                               2842                 :                : 
                               2843                 :                :     /*
                               2844                 :                :      * Might as well use reliable figure when doling out maintenance_work_mem
                               2845                 :                :      * (when requested number of workers were not launched, this will be
                               2846                 :                :      * somewhat higher than it is for other workers).
                               2847                 :                :      */
                               2848                 :              3 :     sortmem = maintenance_work_mem / brinshared->scantuplesortstates;
                               2849                 :                : 
  106                          2850                 :              3 :     _brin_parallel_scan_and_build(buildstate, brinshared, sharedsort,
                               2851                 :                :                                   heapRel, indexRel, sortmem, false);
                               2852                 :                : 
                               2853                 :                :     /* Report WAL/buffer usage during parallel execution */
  128                          2854                 :              3 :     bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
                               2855                 :              3 :     walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
                               2856                 :              3 :     InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
                               2857                 :              3 :                           &walusage[ParallelWorkerNumber]);
                               2858                 :                : 
                               2859                 :              3 :     index_close(indexRel, indexLockmode);
                               2860                 :              3 :     table_close(heapRel, heapLockmode);
                               2861                 :              3 : }
                               2862                 :                : 
                               2863                 :                : /*
                               2864                 :                :  * brin_build_empty_tuple
                               2865                 :                :  *      Maybe initialize a BRIN tuple representing empty range.
                               2866                 :                :  *
                               2867                 :                :  * Returns a BRIN tuple representing an empty page range starting at the
                               2868                 :                :  * specified block number. The empty tuple is initialized only once, when it's
                               2869                 :                :  * needed for the first time, stored in the memory context bs_context to ensure
                               2870                 :                :  * proper life span, and reused on following calls. All empty tuples are
                               2871                 :                :  * exactly the same except for the bs_blkno field, which is set to the value
                               2872                 :                :  * in blkno parameter.
                               2873                 :                :  */
                               2874                 :                : static void
                               2875                 :              4 : brin_build_empty_tuple(BrinBuildState *state, BlockNumber blkno)
                               2876                 :                : {
                               2877                 :                :     /* First time an empty tuple is requested? If yes, initialize it. */
                               2878         [ +  + ]:              4 :     if (state->bs_emptyTuple == NULL)
                               2879                 :                :     {
                               2880                 :                :         MemoryContext oldcxt;
                               2881                 :              1 :         BrinMemTuple *dtuple = brin_new_memtuple(state->bs_bdesc);
                               2882                 :                : 
                               2883                 :                :         /* Allocate the tuple in context for the whole index build. */
                               2884                 :              1 :         oldcxt = MemoryContextSwitchTo(state->bs_context);
                               2885                 :                : 
                               2886                 :              1 :         state->bs_emptyTuple = brin_form_tuple(state->bs_bdesc, blkno, dtuple,
                               2887                 :                :                                                &state->bs_emptyTupleLen);
                               2888                 :                : 
                               2889                 :              1 :         MemoryContextSwitchTo(oldcxt);
                               2890                 :                :     }
                               2891                 :                :     else
                               2892                 :                :     {
                               2893                 :                :         /* If we already have an empty tuple, just update the block. */
                               2894                 :              3 :         state->bs_emptyTuple->bt_blkno = blkno;
                               2895                 :                :     }
                               2896                 :              4 : }
                               2897                 :                : 
                               2898                 :                : /*
                               2899                 :                :  * brin_fill_empty_ranges
                               2900                 :                :  *      Add BRIN index tuples representing empty page ranges.
                               2901                 :                :  *
                               2902                 :                :  * prevRange/nextRange determine for which page ranges to add empty summaries.
                               2903                 :                :  * Both boundaries are exclusive, i.e. only ranges starting at blkno for which
                               2904                 :                :  * (prevRange < blkno < nextRange) will be added to the index.
                               2905                 :                :  *
                               2906                 :                :  * If prevRange is InvalidBlockNumber, this means there was no previous page
                               2907                 :                :  * range (i.e. the first empty range to add is for blkno=0).
                               2908                 :                :  *
                               2909                 :                :  * The empty tuple is built only once, and then reused for all future calls.
                               2910                 :                :  */
                               2911                 :                : static void
                               2912                 :            182 : brin_fill_empty_ranges(BrinBuildState *state,
                               2913                 :                :                        BlockNumber prevRange, BlockNumber nextRange)
                               2914                 :                : {
                               2915                 :                :     BlockNumber blkno;
                               2916                 :                : 
                               2917                 :                :     /*
                               2918                 :                :      * If we already summarized some ranges, we need to start with the next
                               2919                 :                :      * one. Otherwise start from the first range of the table.
                               2920                 :                :      */
                               2921         [ +  + ]:            182 :     blkno = (prevRange == InvalidBlockNumber) ? 0 : (prevRange + state->bs_pagesPerRange);
                               2922                 :                : 
                               2923                 :                :     /* Generate empty ranges until we hit the next non-empty range. */
                               2924         [ +  + ]:            186 :     while (blkno < nextRange)
                               2925                 :                :     {
                               2926                 :                :         /* Did we already build the empty tuple? If not, do it now. */
                               2927                 :              4 :         brin_build_empty_tuple(state, blkno);
                               2928                 :                : 
                               2929                 :              4 :         brin_doinsert(state->bs_irel, state->bs_pagesPerRange, state->bs_rmAccess,
                               2930                 :                :                       &state->bs_currentInsertBuf,
                               2931                 :                :                       blkno, state->bs_emptyTuple, state->bs_emptyTupleLen);
                               2932                 :                : 
                               2933                 :                :         /* try next page range */
                               2934                 :              4 :         blkno += state->bs_pagesPerRange;
                               2935                 :                :     }
                               2936                 :            182 : }
        

Generated by: LCOV version 2.1-beta2-3-g6141622