Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * system.c
4 : * support routines for SYSTEM tablesample method
5 : *
6 : * To ensure repeatability of samples, it is necessary that selection of a
7 : * given tuple be history-independent; otherwise syncscanning would break
8 : * repeatability, to say nothing of logically-irrelevant maintenance such
9 : * as physical extension or shortening of the relation.
10 : *
11 : * To achieve that, we proceed by hashing each candidate block number together
12 : * with the active seed, and then selecting it if the hash is less than the
13 : * cutoff value computed from the selection probability by BeginSampleScan.
14 : *
15 : *
16 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
17 : * Portions Copyright (c) 1994, Regents of the University of California
18 : *
19 : * IDENTIFICATION
20 : * src/backend/access/tablesample/system.c
21 : *
22 : *-------------------------------------------------------------------------
23 : */
24 :
25 : #include "postgres.h"
26 :
27 : #include <math.h>
28 :
29 : #include "access/relscan.h"
30 : #include "access/tsmapi.h"
31 : #include "catalog/pg_type.h"
32 : #include "common/hashfn.h"
33 : #include "optimizer/optimizer.h"
34 : #include "utils/builtins.h"
35 :
36 :
37 : /* Private state */
38 : typedef struct
39 : {
40 : uint64 cutoff; /* select blocks with hash less than this */
41 : uint32 seed; /* random seed */
42 : BlockNumber nextblock; /* next block to consider sampling */
43 : OffsetNumber lt; /* last tuple returned from current block */
44 : } SystemSamplerData;
45 :
46 :
47 : static void system_samplescangetsamplesize(PlannerInfo *root,
48 : RelOptInfo *baserel,
49 : List *paramexprs,
50 : BlockNumber *pages,
51 : double *tuples);
52 : static void system_initsamplescan(SampleScanState *node,
53 : int eflags);
54 : static void system_beginsamplescan(SampleScanState *node,
55 : Datum *params,
56 : int nparams,
57 : uint32 seed);
58 : static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
59 : static OffsetNumber system_nextsampletuple(SampleScanState *node,
60 : BlockNumber blockno,
61 : OffsetNumber maxoffset);
62 :
63 :
64 : /*
65 : * Create a TsmRoutine descriptor for the SYSTEM method.
66 : */
67 : Datum
2815 tgl 68 CBC 203 : tsm_system_handler(PG_FUNCTION_ARGS)
69 : {
70 203 : TsmRoutine *tsm = makeNode(TsmRoutine);
71 :
72 203 : tsm->parameterTypes = list_make1_oid(FLOAT4OID);
73 203 : tsm->repeatable_across_queries = true;
74 203 : tsm->repeatable_across_scans = true;
75 203 : tsm->SampleScanGetSampleSize = system_samplescangetsamplesize;
76 203 : tsm->InitSampleScan = system_initsamplescan;
77 203 : tsm->BeginSampleScan = system_beginsamplescan;
78 203 : tsm->NextSampleBlock = system_nextsampleblock;
79 203 : tsm->NextSampleTuple = system_nextsampletuple;
80 203 : tsm->EndSampleScan = NULL;
81 :
82 203 : PG_RETURN_POINTER(tsm);
83 : }
84 :
85 : /*
86 : * Sample size estimation.
87 : */
88 : static void
89 48 : system_samplescangetsamplesize(PlannerInfo *root,
90 : RelOptInfo *baserel,
91 : List *paramexprs,
92 : BlockNumber *pages,
93 : double *tuples)
94 : {
95 : Node *pctnode;
96 : float4 samplefract;
97 :
98 : /* Try to extract an estimate for the sample percentage */
99 48 : pctnode = (Node *) linitial(paramexprs);
100 48 : pctnode = estimate_expression_value(root, pctnode);
101 :
102 48 : if (IsA(pctnode, Const) &&
103 42 : !((Const *) pctnode)->constisnull)
104 : {
105 39 : samplefract = DatumGetFloat4(((Const *) pctnode)->constvalue);
106 39 : if (samplefract >= 0 && samplefract <= 100 && !isnan(samplefract))
107 33 : samplefract /= 100.0f;
108 : else
109 : {
110 : /* Default samplefract if the value is bogus */
111 6 : samplefract = 0.1f;
112 : }
113 : }
114 : else
115 : {
116 : /* Default samplefract if we didn't obtain a non-null Const */
117 9 : samplefract = 0.1f;
118 : }
119 :
120 : /* We'll visit a sample of the pages ... */
121 48 : *pages = clamp_row_est(baserel->pages * samplefract);
122 :
123 : /* ... and hopefully get a representative number of tuples from them */
124 48 : *tuples = clamp_row_est(baserel->tuples * samplefract);
2886 simon 125 48 : }
126 :
127 : /*
128 : * Initialize during executor setup.
129 : */
130 : static void
2815 tgl 131 48 : system_initsamplescan(SampleScanState *node, int eflags)
132 : {
133 48 : node->tsm_state = palloc0(sizeof(SystemSamplerData));
2886 simon 134 48 : }
135 :
136 : /*
137 : * Examine parameters and prepare for a sample scan.
138 : */
139 : static void
2815 tgl 140 42 : system_beginsamplescan(SampleScanState *node,
141 : Datum *params,
142 : int nparams,
143 : uint32 seed)
144 : {
145 42 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
146 42 : double percent = DatumGetFloat4(params[0]);
147 : double dcutoff;
148 :
149 42 : if (percent < 0 || percent > 100 || isnan(percent))
150 6 : ereport(ERROR,
151 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
152 : errmsg("sample percentage must be between 0 and 100")));
153 :
154 : /*
155 : * The cutoff is sample probability times (PG_UINT32_MAX + 1); we have to
156 : * store that as a uint64, of course. Note that this gives strictly
157 : * correct behavior at the limits of zero or one probability.
158 : */
159 36 : dcutoff = rint(((double) PG_UINT32_MAX + 1) * percent / 100);
160 36 : sampler->cutoff = (uint64) dcutoff;
161 36 : sampler->seed = seed;
162 36 : sampler->nextblock = 0;
2886 simon 163 36 : sampler->lt = InvalidOffsetNumber;
164 :
165 : /*
166 : * Bulkread buffer access strategy probably makes sense unless we're
167 : * scanning a very small fraction of the table. The 1% cutoff here is a
168 : * guess. We should use pagemode visibility checking, since we scan all
169 : * tuples on each selected page.
170 : */
2815 tgl 171 36 : node->use_bulkread = (percent >= 1);
172 36 : node->use_pagemode = true;
2886 simon 173 36 : }
174 :
175 : /*
176 : * Select next block to sample.
177 : */
178 : static BlockNumber
1471 andres 179 2163 : system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
180 : {
2815 tgl 181 2163 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
182 2163 : BlockNumber nextblock = sampler->nextblock;
183 : uint32 hashinput[2];
184 :
185 : /*
186 : * We compute the hash by applying hash_any to an array of 2 uint32's
187 : * containing the block number and seed. This is efficient to set up, and
188 : * with the current implementation of hash_any, it gives
189 : * machine-independent results, which is a nice property for regression
190 : * testing.
191 : *
192 : * These words in the hash input are the same throughout the block:
193 : */
194 2163 : hashinput[1] = sampler->seed;
195 :
196 : /*
197 : * Loop over block numbers until finding suitable block or reaching end of
198 : * relation.
199 : */
1471 andres 200 4266 : for (; nextblock < nblocks; nextblock++)
201 : {
202 : uint32 hash;
203 :
2815 tgl 204 4233 : hashinput[0] = nextblock;
205 :
206 4233 : hash = DatumGetUInt32(hash_any((const unsigned char *) hashinput,
207 : (int) sizeof(hashinput)));
208 4233 : if (hash < sampler->cutoff)
209 2130 : break;
210 : }
211 :
1471 andres 212 2163 : if (nextblock < nblocks)
213 : {
214 : /* Found a suitable block; remember where we should start next time */
2815 tgl 215 2130 : sampler->nextblock = nextblock + 1;
216 2130 : return nextblock;
217 : }
218 :
219 : /* Done, but let's reset nextblock to 0 for safety. */
220 33 : sampler->nextblock = 0;
221 33 : return InvalidBlockNumber;
222 : }
223 :
224 : /*
225 : * Select next sampled tuple in current block.
226 : *
227 : * In block sampling, we just want to sample all the tuples in each selected
228 : * block.
229 : *
230 : * It is OK here to return an offset without knowing if the tuple is visible
231 : * (or even exists); nodeSamplescan.c will deal with that.
232 : *
233 : * When we reach end of the block, return InvalidOffsetNumber which tells
234 : * SampleScan to go to next block.
235 : */
236 : static OffsetNumber
237 62289 : system_nextsampletuple(SampleScanState *node,
238 : BlockNumber blockno,
239 : OffsetNumber maxoffset)
240 : {
241 62289 : SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
242 62289 : OffsetNumber tupoffset = sampler->lt;
243 :
244 : /* Advance to next possible offset on page */
245 62289 : if (tupoffset == InvalidOffsetNumber)
246 2130 : tupoffset = FirstOffsetNumber;
247 : else
248 60159 : tupoffset++;
249 :
250 : /* Done? */
251 62289 : if (tupoffset > maxoffset)
252 2127 : tupoffset = InvalidOffsetNumber;
253 :
254 62289 : sampler->lt = tupoffset;
255 :
256 62289 : return tupoffset;
257 : }
|