LCOV - differential code coverage report
Current view: top level - src/backend/access/tablesample - bernoulli.c (source / functions) Coverage Total Hit CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 57 57 57
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 5 5 5
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 100.0 % 57 57 57
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 100.0 % 5 5 5

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * bernoulli.c
                                  4                 :  *    support routines for BERNOULLI tablesample method
                                  5                 :  *
                                  6                 :  * To ensure repeatability of samples, it is necessary that selection of a
                                  7                 :  * given tuple be history-independent; otherwise syncscanning would break
                                  8                 :  * repeatability, to say nothing of logically-irrelevant maintenance such
                                  9                 :  * as physical extension or shortening of the relation.
                                 10                 :  *
                                 11                 :  * To achieve that, we proceed by hashing each candidate TID together with
                                 12                 :  * the active seed, and then selecting it if the hash is less than the
                                 13                 :  * cutoff value computed from the selection probability by BeginSampleScan.
                                 14                 :  *
                                 15                 :  *
                                 16                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 17                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 18                 :  *
                                 19                 :  * IDENTIFICATION
                                 20                 :  *    src/backend/access/tablesample/bernoulli.c
                                 21                 :  *
                                 22                 :  *-------------------------------------------------------------------------
                                 23                 :  */
                                 24                 : 
                                 25                 : #include "postgres.h"
                                 26                 : 
                                 27                 : #include <math.h>
                                 28                 : 
                                 29                 : #include "access/tsmapi.h"
                                 30                 : #include "catalog/pg_type.h"
                                 31                 : #include "common/hashfn.h"
                                 32                 : #include "optimizer/optimizer.h"
                                 33                 : #include "utils/builtins.h"
                                 34                 : 
                                 35                 : 
                                 36                 : /* Private state */
                                 37                 : typedef struct
                                 38                 : {
                                 39                 :     uint64      cutoff;         /* select tuples with hash less than this */
                                 40                 :     uint32      seed;           /* random seed */
                                 41                 :     OffsetNumber lt;            /* last tuple returned from current block */
                                 42                 : } BernoulliSamplerData;
                                 43                 : 
                                 44                 : 
                                 45                 : static void bernoulli_samplescangetsamplesize(PlannerInfo *root,
                                 46                 :                                               RelOptInfo *baserel,
                                 47                 :                                               List *paramexprs,
                                 48                 :                                               BlockNumber *pages,
                                 49                 :                                               double *tuples);
                                 50                 : static void bernoulli_initsamplescan(SampleScanState *node,
                                 51                 :                                      int eflags);
                                 52                 : static void bernoulli_beginsamplescan(SampleScanState *node,
                                 53                 :                                       Datum *params,
                                 54                 :                                       int nparams,
                                 55                 :                                       uint32 seed);
                                 56                 : static OffsetNumber bernoulli_nextsampletuple(SampleScanState *node,
                                 57                 :                                               BlockNumber blockno,
                                 58                 :                                               OffsetNumber maxoffset);
                                 59                 : 
                                 60                 : 
                                 61                 : /*
                                 62                 :  * Create a TsmRoutine descriptor for the BERNOULLI method.
                                 63                 :  */
                                 64                 : Datum
 2815 tgl                        65 CBC         228 : tsm_bernoulli_handler(PG_FUNCTION_ARGS)
                                 66                 : {
                                 67             228 :     TsmRoutine *tsm = makeNode(TsmRoutine);
                                 68                 : 
                                 69             228 :     tsm->parameterTypes = list_make1_oid(FLOAT4OID);
                                 70             228 :     tsm->repeatable_across_queries = true;
                                 71             228 :     tsm->repeatable_across_scans = true;
                                 72             228 :     tsm->SampleScanGetSampleSize = bernoulli_samplescangetsamplesize;
                                 73             228 :     tsm->InitSampleScan = bernoulli_initsamplescan;
                                 74             228 :     tsm->BeginSampleScan = bernoulli_beginsamplescan;
                                 75             228 :     tsm->NextSampleBlock = NULL;
                                 76             228 :     tsm->NextSampleTuple = bernoulli_nextsampletuple;
                                 77             228 :     tsm->EndSampleScan = NULL;
                                 78                 : 
                                 79             228 :     PG_RETURN_POINTER(tsm);
                                 80                 : }
                                 81                 : 
                                 82                 : /*
                                 83                 :  * Sample size estimation.
                                 84                 :  */
                                 85                 : static void
                                 86              60 : bernoulli_samplescangetsamplesize(PlannerInfo *root,
                                 87                 :                                   RelOptInfo *baserel,
                                 88                 :                                   List *paramexprs,
                                 89                 :                                   BlockNumber *pages,
                                 90                 :                                   double *tuples)
                                 91                 : {
                                 92                 :     Node       *pctnode;
                                 93                 :     float4      samplefract;
                                 94                 : 
                                 95                 :     /* Try to extract an estimate for the sample percentage */
                                 96              60 :     pctnode = (Node *) linitial(paramexprs);
                                 97              60 :     pctnode = estimate_expression_value(root, pctnode);
                                 98                 : 
                                 99              60 :     if (IsA(pctnode, Const) &&
                                100              51 :         !((Const *) pctnode)->constisnull)
                                101                 :     {
                                102              51 :         samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
                                103              51 :         if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
                                104              45 :             samplefract /= 100.0f;
                                105                 :         else
                                106                 :         {
                                107                 :             /* Default samplefract if the value is bogus */
                                108               6 :             samplefract = 0.1f;
                                109                 :         }
                                110                 :     }
                                111                 :     else
                                112                 :     {
                                113                 :         /* Default samplefract if we didn't obtain a non-null Const */
                                114               9 :         samplefract = 0.1f;
                                115                 :     }
                                116                 : 
                                117                 :     /* We'll visit all pages of the baserel */
                                118              60 :     *pages = baserel->pages;
                                119                 : 
                                120              60 :     *tuples = clamp_row_est(baserel->tuples * samplefract);
                                121              60 : }
                                122                 : 
                                123                 : /*
                                124                 :  * Initialize during executor setup.
                                125                 :  */
                                126                 : static void
                                127              60 : bernoulli_initsamplescan(SampleScanState *node, int eflags)
                                128                 : {
                                129              60 :     node->tsm_state = palloc0(sizeof(BernoulliSamplerData));
 2886 simon                     130              60 : }
                                131                 : 
                                132                 : /*
                                133                 :  * Examine parameters and prepare for a sample scan.
                                134                 :  */
                                135                 : static void
 2815 tgl                       136              45 : bernoulli_beginsamplescan(SampleScanState *node,
                                137                 :                           Datum *params,
                                138                 :                           int nparams,
                                139                 :                           uint32 seed)
                                140                 : {
                                141              45 :     BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
                                142              45 :     double      percent = DatumGetFloat4(params[0]);
                                143                 :     double      dcutoff;
                                144                 : 
                                145              45 :     if (percent < 0 || percent > 100 || isnan(percent))
                                146               6 :         ereport(ERROR,
                                147                 :                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
                                148                 :                  errmsg("sample percentage must be between 0 and 100")));
                                149                 : 
                                150                 :     /*
                                151                 :      * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
                                152                 :      * store that as a uint64, of course.  Note that this gives strictly
                                153                 :      * correct behavior at the limits of zero or one probability.
                                154                 :      */
                                155              39 :     dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
                                156              39 :     sampler->cutoff = (uint64) dcutoff;
                                157              39 :     sampler->seed = seed;
                                158              39 :     sampler->lt = InvalidOffsetNumber;
                                159                 : 
                                160                 :     /*
                                161                 :      * Use bulkread, since we're scanning all pages.  But pagemode visibility
                                162                 :      * checking is a win only at larger sampling fractions.  The 25% cutoff
                                163                 :      * here is based on very limited experimentation.
                                164                 :      */
                                165              39 :     node->use_bulkread = true;
                                166              39 :     node->use_pagemode = (percent >= 25);
 2886 simon                     167              39 : }
                                168                 : 
                                169                 : /*
                                170                 :  * Select next sampled tuple in current block.
                                171                 :  *
                                172                 :  * It is OK here to return an offset without knowing if the tuple is visible
                                173                 :  * (or even exists).  The reason is that we do the coinflip for every tuple
                                174                 :  * offset in the table.  Since all tuples have the same probability of being
                                175                 :  * returned, it doesn't matter if we do extra coinflips for invisible tuples.
                                176                 :  *
                                177                 :  * When we reach end of the block, return InvalidOffsetNumber which tells
                                178                 :  * SampleScan to go to next block.
                                179                 :  */
                                180                 : static OffsetNumber
 2815 tgl                       181           64416 : bernoulli_nextsampletuple(SampleScanState *node,
                                182                 :                           BlockNumber blockno,
                                183                 :                           OffsetNumber maxoffset)
                                184                 : {
                                185           64416 :     BernoulliSamplerData *sampler = (BernoulliSamplerData *) node->tsm_state;
 2878 bruce                     186           64416 :     OffsetNumber tupoffset = sampler->lt;
                                187                 :     uint32      hashinput[3];
                                188                 : 
                                189                 :     /* Advance to first/next tuple in block */
 2886 simon                     190           64416 :     if (tupoffset == InvalidOffsetNumber)
                                191            4194 :         tupoffset = FirstOffsetNumber;
                                192                 :     else
                                193           60222 :         tupoffset++;
                                194                 : 
                                195                 :     /*
                                196                 :      * We compute the hash by applying hash_any to an array of 3 uint32's
                                197                 :      * containing the block, offset, and seed.  This is efficient to set up,
                                198                 :      * and with the current implementation of hash_any, it gives
                                199                 :      * machine-independent results, which is a nice property for regression
                                200                 :      * testing.
                                201                 :      *
                                202                 :      * These words in the hash input are the same throughout the block:
                                203                 :      */
 2815 tgl                       204           64416 :     hashinput[0] = blockno;
                                205           64416 :     hashinput[2] = sampler->seed;
                                206                 : 
                                207                 :     /*
                                208                 :      * Loop over tuple offsets until finding suitable TID or reaching end of
                                209                 :      * block.
                                210                 :      */
                                211          124518 :     for (; tupoffset <= maxoffset; tupoffset++)
                                212                 :     {
                                213                 :         uint32      hash;
                                214                 : 
                                215          120324 :         hashinput[1] = tupoffset;
                                216                 : 
                                217          120324 :         hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
                                218                 :                                        (int) sizeof(hashinput)));
                                219          120324 :         if (hash < sampler->cutoff)
 2886 simon                     220           60222 :             break;
                                221                 :     }
                                222                 : 
                                223           64416 :     if (tupoffset > maxoffset)
                                224            4194 :         tupoffset = InvalidOffsetNumber;
                                225                 : 
                                226           64416 :     sampler->lt = tupoffset;
                                227                 : 
 2815 tgl                       228           64416 :     return tupoffset;
                                229                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a