Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeSamplescan.c
4 : * Support routines for sample scans of relations (table sampling).
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeSamplescan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/relscan.h"
18 : #include "access/tableam.h"
19 : #include "access/tsmapi.h"
20 : #include "common/pg_prng.h"
21 : #include "executor/executor.h"
22 : #include "executor/nodeSamplescan.h"
23 : #include "miscadmin.h"
24 : #include "pgstat.h"
25 : #include "storage/bufmgr.h"
26 : #include "storage/predicate.h"
27 : #include "utils/builtins.h"
28 : #include "utils/rel.h"
29 :
30 : static TupleTableSlot *SampleNext(SampleScanState *node);
31 : static void tablesample_init(SampleScanState *scanstate);
32 : static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
33 :
34 : /* ----------------------------------------------------------------
35 : * Scan Support
36 : * ----------------------------------------------------------------
37 : */
38 :
39 : /* ----------------------------------------------------------------
40 : * SampleNext
41 : *
42 : * This is a workhorse for ExecSampleScan
43 : * ----------------------------------------------------------------
44 : */
45 : static TupleTableSlot *
2886 simon 46 CBC 120685 : SampleNext(SampleScanState *node)
47 : {
48 : /*
49 : * if this is first call within a scan, initialize
50 : */
2815 tgl 51 120685 : if (!node->begun)
52 108 : tablesample_init(node);
53 :
54 : /*
55 : * get the next tuple, and store it in our result slot
56 : */
1471 andres 57 120665 : return tablesample_getnext(node);
58 : }
59 :
60 : /*
61 : * SampleRecheck -- access method routine to recheck a tuple in EvalPlanQual
62 : */
63 : static bool
2886 simon 64 UBC 0 : SampleRecheck(SampleScanState *node, TupleTableSlot *slot)
65 : {
66 : /*
67 : * No need to recheck for SampleScan, since like SeqScan we don't pass any
68 : * checkable keys to heap_beginscan.
69 : */
70 0 : return true;
71 : }
72 :
73 : /* ----------------------------------------------------------------
74 : * ExecSampleScan(node)
75 : *
76 : * Scans the relation using the sampling method and returns
77 : * the next qualifying tuple.
78 : * We call the ExecScan() routine and pass it the appropriate
79 : * access method functions.
80 : * ----------------------------------------------------------------
81 : */
82 : static TupleTableSlot *
2092 andres 83 CBC 120682 : ExecSampleScan(PlanState *pstate)
84 : {
85 120682 : SampleScanState *node = castNode(SampleScanState, pstate);
86 :
87 120682 : return ExecScan(&node->ss,
88 : (ExecScanAccessMtd) SampleNext,
89 : (ExecScanRecheckMtd) SampleRecheck);
90 : }
91 :
92 : /* ----------------------------------------------------------------
93 : * ExecInitSampleScan
94 : * ----------------------------------------------------------------
95 : */
96 : SampleScanState *
2886 simon 97 126 : ExecInitSampleScan(SampleScan *node, EState *estate, int eflags)
98 : {
99 : SampleScanState *scanstate;
2815 tgl 100 126 : TableSampleClause *tsc = node->tablesample;
101 : TsmRoutine *tsm;
102 :
2886 simon 103 126 : Assert(outerPlan(node) == NULL);
104 126 : Assert(innerPlan(node) == NULL);
105 :
106 : /*
107 : * create state structure
108 : */
109 126 : scanstate = makeNode(SampleScanState);
110 126 : scanstate->ss.ps.plan = (Plan *) node;
111 126 : scanstate->ss.ps.state = estate;
2092 andres 112 126 : scanstate->ss.ps.ExecProcNode = ExecSampleScan;
113 :
114 : /*
115 : * Miscellaneous initialization
116 : *
117 : * create expression context for node
118 : */
2886 simon 119 126 : ExecAssignExprContext(estate, &scanstate->ss.ps);
120 :
121 : /*
122 : * open the scan relation
123 : */
1878 andres 124 126 : scanstate->ss.ss_currentRelation =
125 126 : ExecOpenScanRelation(estate,
126 : node->scan.scanrelid,
127 : eflags);
128 :
129 : /* we won't set up the HeapScanDesc till later */
130 126 : scanstate->ss.ss_currentScanDesc = NULL;
131 :
132 : /* and create slot with appropriate rowtype */
133 126 : ExecInitScanTupleSlot(estate, &scanstate->ss,
1606 134 126 : RelationGetDescr(scanstate->ss.ss_currentRelation),
135 : table_slot_callbacks(scanstate->ss.ss_currentRelation));
136 :
137 : /*
138 : * Initialize result type and projection.
139 : */
1612 140 126 : ExecInitResultTypeTL(&scanstate->ss.ps);
1878 141 126 : ExecAssignScanProjectionInfo(&scanstate->ss);
142 :
143 : /*
144 : * initialize child expressions
145 : */
146 126 : scanstate->ss.ps.qual =
147 126 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
148 :
149 126 : scanstate->args = ExecInitExprList(tsc->args, (PlanState *) scanstate);
150 126 : scanstate->repeatable =
151 126 : ExecInitExpr(tsc->repeatable, (PlanState *) scanstate);
152 :
153 : /*
154 : * If we don't have a REPEATABLE clause, select a random seed. We want to
155 : * do this just once, since the seed shouldn't change over rescans.
156 : */
2815 tgl 157 126 : if (tsc->repeatable == NULL)
497 158 84 : scanstate->seed = pg_prng_uint32(&pg_global_prng_state);
159 :
160 : /*
161 : * Finally, initialize the TABLESAMPLE method handler.
162 : */
2815 163 126 : tsm = GetTsmRoutine(tsc->tsmhandler);
164 126 : scanstate->tsmroutine = tsm;
165 126 : scanstate->tsm_state = NULL;
166 :
167 126 : if (tsm->InitSampleScan)
168 126 : tsm->InitSampleScan(scanstate, eflags);
169 :
170 : /* We'll do BeginSampleScan later; we can't evaluate params yet */
171 126 : scanstate->begun = false;
172 :
2886 simon 173 126 : return scanstate;
174 : }
175 :
176 : /* ----------------------------------------------------------------
177 : * ExecEndSampleScan
178 : *
179 : * frees any storage allocated through C routines.
180 : * ----------------------------------------------------------------
181 : */
182 : void
183 106 : ExecEndSampleScan(SampleScanState *node)
184 : {
185 : /*
186 : * Tell sampling function that we finished the scan.
187 : */
2815 tgl 188 106 : if (node->tsmroutine->EndSampleScan)
2815 tgl 189 UBC 0 : node->tsmroutine->EndSampleScan(node);
190 :
191 : /*
192 : * Free the exprcontext
193 : */
2886 simon 194 CBC 106 : ExecFreeExprContext(&node->ss.ps);
195 :
196 : /*
197 : * clean out the tuple table
198 : */
1612 andres 199 106 : if (node->ss.ps.ps_ResultTupleSlot)
200 57 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
2886 simon 201 106 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
202 :
203 : /*
204 : * close heap scan
205 : */
2815 tgl 206 106 : if (node->ss.ss_currentScanDesc)
1490 andres 207 73 : table_endscan(node->ss.ss_currentScanDesc);
2886 simon 208 106 : }
209 :
210 : /* ----------------------------------------------------------------
211 : * ExecReScanSampleScan
212 : *
213 : * Rescans the relation.
214 : *
215 : * ----------------------------------------------------------------
216 : */
217 : void
218 29 : ExecReScanSampleScan(SampleScanState *node)
219 : {
220 : /* Remember we need to do BeginSampleScan again (if we did it at all) */
2815 tgl 221 29 : node->begun = false;
1471 andres 222 29 : node->done = false;
223 29 : node->haveblock = false;
224 29 : node->donetuples = 0;
225 :
2815 tgl 226 29 : ExecScanReScan(&node->ss);
227 29 : }
228 :
229 :
230 : /*
231 : * Initialize the TABLESAMPLE method: evaluate params and call BeginSampleScan.
232 : */
233 : static void
234 108 : tablesample_init(SampleScanState *scanstate)
235 : {
236 108 : TsmRoutine *tsm = scanstate->tsmroutine;
237 108 : ExprContext *econtext = scanstate->ss.ps.ps_ExprContext;
238 : Datum *params;
239 : Datum datum;
240 : bool isnull;
241 : uint32 seed;
242 : bool allow_sync;
243 : int i;
244 : ListCell *arg;
245 :
1471 andres 246 108 : scanstate->donetuples = 0;
2815 tgl 247 108 : params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
248 :
249 108 : i = 0;
250 213 : foreach(arg, scanstate->args)
251 : {
252 108 : ExprState *argstate = (ExprState *) lfirst(arg);
253 :
254 108 : params[i] = ExecEvalExprSwitchContext(argstate,
255 : econtext,
256 : &isnull);
257 108 : if (isnull)
258 3 : ereport(ERROR,
259 : (errcode(ERRCODE_INVALID_TABLESAMPLE_ARGUMENT),
260 : errmsg("TABLESAMPLE parameter cannot be null")));
261 105 : i++;
262 : }
263 :
264 105 : if (scanstate->repeatable)
265 : {
266 36 : datum = ExecEvalExprSwitchContext(scanstate->repeatable,
267 : econtext,
268 : &isnull);
269 36 : if (isnull)
270 3 : ereport(ERROR,
271 : (errcode(ERRCODE_INVALID_TABLESAMPLE_REPEAT),
272 : errmsg("TABLESAMPLE REPEATABLE parameter cannot be null")));
273 :
274 : /*
275 : * The REPEATABLE parameter has been coerced to float8 by the parser.
276 : * The reason for using float8 at the SQL level is that it will
277 : * produce unsurprising results both for users used to databases that
278 : * accept only integers in the REPEATABLE clause and for those who
279 : * might expect that REPEATABLE works like setseed() (a float in the
280 : * range from -1 to 1).
281 : *
282 : * We use hashfloat8() to convert the supplied value into a suitable
283 : * seed. For regression-testing purposes, that has the convenient
284 : * property that REPEATABLE(0) gives a machine-independent result.
285 : */
286 33 : seed = DatumGetUInt32(DirectFunctionCall1(hashfloat8, datum));
287 : }
288 : else
289 : {
290 : /* Use the seed selected by ExecInitSampleScan */
291 69 : seed = scanstate->seed;
292 : }
293 :
294 : /* Set default values for params that BeginSampleScan can adjust */
295 102 : scanstate->use_bulkread = true;
296 102 : scanstate->use_pagemode = true;
297 :
298 : /* Let tablesample method do its thing */
299 102 : tsm->BeginSampleScan(scanstate,
300 : params,
301 102 : list_length(scanstate->args),
302 : seed);
303 :
304 : /* We'll use syncscan if there's no NextSampleBlock function */
305 88 : allow_sync = (tsm->NextSampleBlock == NULL);
306 :
307 : /* Now we can create or reset the HeapScanDesc */
308 88 : if (scanstate->ss.ss_currentScanDesc == NULL)
309 : {
310 73 : scanstate->ss.ss_currentScanDesc =
1490 andres 311 73 : table_beginscan_sampling(scanstate->ss.ss_currentRelation,
312 73 : scanstate->ss.ps.state->es_snapshot,
313 : 0, NULL,
314 73 : scanstate->use_bulkread,
315 : allow_sync,
316 73 : scanstate->use_pagemode);
317 : }
318 : else
319 : {
320 15 : table_rescan_set_params(scanstate->ss.ss_currentScanDesc, NULL,
321 15 : scanstate->use_bulkread,
322 : allow_sync,
323 15 : scanstate->use_pagemode);
324 : }
325 :
2815 tgl 326 88 : pfree(params);
327 :
328 : /* And we're initialized. */
329 88 : scanstate->begun = true;
330 88 : }
331 :
332 : /*
333 : * Get next tuple from TABLESAMPLE method.
334 : */
335 : static TupleTableSlot *
336 120665 : tablesample_getnext(SampleScanState *scanstate)
337 : {
1490 andres 338 120665 : TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
1471 339 120665 : TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
340 :
341 120665 : ExecClearTuple(slot);
342 :
343 120665 : if (scanstate->done)
1471 andres 344 UBC 0 : return NULL;
345 :
346 : for (;;)
347 : {
1471 andres 348 CBC 127032 : if (!scanstate->haveblock)
349 : {
350 6455 : if (!table_scan_sample_next_block(scan, scanstate))
351 : {
352 85 : scanstate->haveblock = false;
353 85 : scanstate->done = true;
354 :
355 : /* exhausted relation */
356 85 : return NULL;
357 : }
358 :
359 6370 : scanstate->haveblock = true;
360 : }
361 :
362 126947 : if (!table_scan_sample_next_tuple(scan, scanstate, slot))
363 : {
364 : /*
365 : * If we get here, it means we've exhausted the items on this page
366 : * and it's time to move to the next.
367 : */
368 6367 : scanstate->haveblock = false;
369 6367 : continue;
370 : }
371 :
372 : /* Found visible tuple, return it. */
373 120580 : break;
374 : }
375 :
376 120580 : scanstate->donetuples++;
377 :
378 120580 : return slot;
379 : }
|