LCOV - differential code coverage report
Current view: top level - src/backend/access/tablesample - system.c (source / functions) Coverage Total Hit CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 100.0 % 65 65 65
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 6 6 6
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * system.c
       4                 :  *    support routines for SYSTEM tablesample method
       5                 :  *
       6                 :  * To ensure repeatability of samples, it is necessary that selection of a
       7                 :  * given tuple be history-independent; otherwise syncscanning would break
       8                 :  * repeatability, to say nothing of logically-irrelevant maintenance such
       9                 :  * as physical extension or shortening of the relation.
      10                 :  *
      11                 :  * To achieve that, we proceed by hashing each candidate block number together
      12                 :  * with the active seed, and then selecting it if the hash is less than the
      13                 :  * cutoff value computed from the selection probability by BeginSampleScan.
      14                 :  *
      15                 :  *
      16                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      17                 :  * Portions Copyright (c) 1994, Regents of the University of California
      18                 :  *
      19                 :  * IDENTIFICATION
      20                 :  *    src/backend/access/tablesample/system.c
      21                 :  *
      22                 :  *-------------------------------------------------------------------------
      23                 :  */
      24                 : 
      25                 : #include "postgres.h"
      26                 : 
      27                 : #include <math.h>
      28                 : 
      29                 : #include "access/relscan.h"
      30                 : #include "access/tsmapi.h"
      31                 : #include "catalog/pg_type.h"
      32                 : #include "common/hashfn.h"
      33                 : #include "optimizer/optimizer.h"
      34                 : #include "utils/builtins.h"
      35                 : 
      36                 : 
      37                 : /* Private state */
      38                 : typedef struct
      39                 : {
      40                 :     uint64      cutoff;         /* select blocks with hash less than this */
      41                 :     uint32      seed;           /* random seed */
      42                 :     BlockNumber nextblock;      /* next block to consider sampling */
      43                 :     OffsetNumber lt;            /* last tuple returned from current block */
      44                 : } SystemSamplerData;
      45                 : 
      46                 : 
      47                 : static void system_samplescangetsamplesize(PlannerInfo *root,
      48                 :                                            RelOptInfo *baserel,
      49                 :                                            List *paramexprs,
      50                 :                                            BlockNumber *pages,
      51                 :                                            double *tuples);
      52                 : static void system_initsamplescan(SampleScanState *node,
      53                 :                                   int eflags);
      54                 : static void system_beginsamplescan(SampleScanState *node,
      55                 :                                    Datum *params,
      56                 :                                    int nparams,
      57                 :                                    uint32 seed);
      58                 : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
      59                 : static OffsetNumber system_nextsampletuple(SampleScanState *node,
      60                 :                                            BlockNumber blockno,
      61                 :                                            OffsetNumber maxoffset);
      62                 : 
      63                 : 
      64                 : /*
      65                 :  * Create a TsmRoutine descriptor for the SYSTEM method.
      66                 :  */
      67                 : Datum
      68 CBC         203 : tsm_system_handler(PG_FUNCTION_ARGS)
      69                 : {
      70             203 :     TsmRoutine *tsm = makeNode(TsmRoutine);
      71                 : 
      72             203 :     tsm->parameterTypes = list_make1_oid(FLOAT4OID);
      73             203 :     tsm->repeatable_across_queries = true;
      74             203 :     tsm->repeatable_across_scans = true;
      75             203 :     tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
      76             203 :     tsm->InitSampleScan = system_initsamplescan;
      77             203 :     tsm->BeginSampleScan = system_beginsamplescan;
      78             203 :     tsm->NextSampleBlock = system_nextsampleblock;
      79             203 :     tsm->NextSampleTuple = system_nextsampletuple;
      80             203 :     tsm->EndSampleScan = NULL;
      81                 : 
      82             203 :     PG_RETURN_POINTER(tsm);
      83                 : }
      84                 : 
      85                 : /*
      86                 :  * Sample size estimation.
      87                 :  */
      88                 : static void
      89              48 : system_samplescangetsamplesize(PlannerInfo *root,
      90                 :                                RelOptInfo *baserel,
      91                 :                                List *paramexprs,
      92                 :                                BlockNumber *pages,
      93                 :                                double *tuples)
      94                 : {
      95                 :     Node       *pctnode;
      96                 :     float4      samplefract;
      97                 : 
      98                 :     /* Try to extract an estimate for the sample percentage */
      99              48 :     pctnode = (Node *) linitial(paramexprs);
     100              48 :     pctnode = estimate_expression_value(root, pctnode);
     101                 : 
     102              48 :     if (IsA(pctnode, Const) &&
     103              42 :         !((Const *) pctnode)->constisnull)
     104                 :     {
     105              39 :         samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
     106              39 :         if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
     107              33 :             samplefract /= 100.0f;
     108                 :         else
     109                 :         {
     110                 :             /* Default samplefract if the value is bogus */
     111               6 :             samplefract = 0.1f;
     112                 :         }
     113                 :     }
     114                 :     else
     115                 :     {
     116                 :         /* Default samplefract if we didn't obtain a non-null Const */
     117               9 :         samplefract = 0.1f;
     118                 :     }
     119                 : 
     120                 :     /* We'll visit a sample of the pages ... */
     121              48 :     *pages = clamp_row_est(baserel->pages * samplefract);
     122                 : 
     123                 :     /* ... and hopefully get a representative number of tuples from them */
     124              48 :     *tuples = clamp_row_est(baserel->tuples * samplefract);
     125              48 : }
     126                 : 
     127                 : /*
     128                 :  * Initialize during executor setup.
     129                 :  */
     130                 : static void
     131              48 : system_initsamplescan(SampleScanState *node, int eflags)
     132                 : {
     133              48 :     node->tsm_state = palloc0(sizeof(SystemSamplerData));
     134              48 : }
     135                 : 
     136                 : /*
     137                 :  * Examine parameters and prepare for a sample scan.
     138                 :  */
     139                 : static void
     140              42 : system_beginsamplescan(SampleScanState *node,
     141                 :                        Datum *params,
     142                 :                        int nparams,
     143                 :                        uint32 seed)
     144                 : {
     145              42 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     146              42 :     double      percent = DatumGetFloat4(params[0]);
     147                 :     double      dcutoff;
     148                 : 
     149              42 :     if (percent < 0 || percent > 100 || isnan(percent))
     150               6 :         ereport(ERROR,
     151                 :                 (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
     152                 :                  errmsg("sample percentage must be between 0 and 100")));
     153                 : 
     154                 :     /*
     155                 :      * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
     156                 :      * store that as a uint64, of course.  Note that this gives strictly
     157                 :      * correct behavior at the limits of zero or one probability.
     158                 :      */
     159              36 :     dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
     160              36 :     sampler->cutoff = (uint64) dcutoff;
     161              36 :     sampler->seed = seed;
     162              36 :     sampler->nextblock = 0;
     163              36 :     sampler->lt = InvalidOffsetNumber;
     164                 : 
     165                 :     /*
     166                 :      * Bulkread buffer access strategy probably makes sense unless we're
     167                 :      * scanning a very small fraction of the table.  The 1% cutoff here is a
     168                 :      * guess.  We should use pagemode visibility checking, since we scan all
     169                 :      * tuples on each selected page.
     170                 :      */
     171              36 :     node->use_bulkread = (percent >= 1);
     172              36 :     node->use_pagemode = true;
     173              36 : }
     174                 : 
     175                 : /*
     176                 :  * Select next block to sample.
     177                 :  */
     178                 : static BlockNumber
     179            2163 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
     180                 : {
     181            2163 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     182            2163 :     BlockNumber nextblock = sampler->nextblock;
     183                 :     uint32      hashinput[2];
     184                 : 
     185                 :     /*
     186                 :      * We compute the hash by applying hash_any to an array of 2 uint32's
     187                 :      * containing the block number and seed.  This is efficient to set up, and
     188                 :      * with the current implementation of hash_any, it gives
     189                 :      * machine-independent results, which is a nice property for regression
     190                 :      * testing.
     191                 :      *
     192                 :      * These words in the hash input are the same throughout the block:
     193                 :      */
     194            2163 :     hashinput[1] = sampler->seed;
     195                 : 
     196                 :     /*
     197                 :      * Loop over block numbers until finding suitable block or reaching end of
     198                 :      * relation.
     199                 :      */
     200            4266 :     for (; nextblock < nblocks; nextblock++)
     201                 :     {
     202                 :         uint32      hash;
     203                 : 
     204            4233 :         hashinput[0] = nextblock;
     205                 : 
     206            4233 :         hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
     207                 :                                        (int) sizeof(hashinput)));
     208            4233 :         if (hash < sampler->cutoff)
     209            2130 :             break;
     210                 :     }
     211                 : 
     212            2163 :     if (nextblock < nblocks)
     213                 :     {
     214                 :         /* Found a suitable block; remember where we should start next time */
     215            2130 :         sampler->nextblock = nextblock + 1;
     216            2130 :         return nextblock;
     217                 :     }
     218                 : 
     219                 :     /* Done, but let's reset nextblock to 0 for safety. */
     220              33 :     sampler->nextblock = 0;
     221              33 :     return InvalidBlockNumber;
     222                 : }
     223                 : 
     224                 : /*
     225                 :  * Select next sampled tuple in current block.
     226                 :  *
     227                 :  * In block sampling, we just want to sample all the tuples in each selected
     228                 :  * block.
     229                 :  *
     230                 :  * It is OK here to return an offset without knowing if the tuple is visible
     231                 :  * (or even exists); nodeSamplescan.c will deal with that.
     232                 :  *
     233                 :  * When we reach end of the block, return InvalidOffsetNumber which tells
     234                 :  * SampleScan to go to next block.
     235                 :  */
     236                 : static OffsetNumber
     237           62289 : system_nextsampletuple(SampleScanState *node,
     238                 :                        BlockNumber blockno,
     239                 :                        OffsetNumber maxoffset)
     240                 : {
     241           62289 :     SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
     242           62289 :     OffsetNumber tupoffset = sampler->lt;
     243                 : 
     244                 :     /* Advance to next possible offset on page */
     245           62289 :     if (tupoffset == InvalidOffsetNumber)
     246            2130 :         tupoffset = FirstOffsetNumber;
     247                 :     else
     248           60159 :         tupoffset++;
     249                 : 
     250                 :     /* Done? */
     251           62289 :     if (tupoffset > maxoffset)
     252            2127 :         tupoffset = InvalidOffsetNumber;
     253                 : 
     254           62289 :     sampler->lt = tupoffset;
     255                 : 
     256           62289 :     return tupoffset;
     257                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a