TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeTidscan.c
4 : * Routines to support direct tid scans of relations
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/executor/nodeTidscan.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : /*
16 : * INTERFACE ROUTINES
17 : *
18 : * ExecTidScan scans a relation using tids
19 : * ExecInitTidScan creates and initializes state info.
20 : * ExecReScanTidScan rescans the tid relation.
21 : * ExecEndTidScan releases all storage.
22 : */
23 : #include "postgres.h"
24 :
25 : #include "access/sysattr.h"
26 : #include "access/tableam.h"
27 : #include "catalog/pg_type.h"
28 : #include "executor/execdebug.h"
29 : #include "executor/nodeTidscan.h"
30 : #include "lib/qunique.h"
31 : #include "miscadmin.h"
32 : #include "nodes/nodeFuncs.h"
33 : #include "storage/bufmgr.h"
34 : #include "utils/array.h"
35 : #include "utils/rel.h"
36 :
37 :
38 : /*
39 : * It's sufficient to check varattno to identify the CTID variable, as any
40 : * Var in the relation scan qual must be for our table. (Even if it's a
41 : * parameterized scan referencing some other table's CTID, the other table's
42 : * Var would have become a Param by the time it gets here.)
43 : */
44 : #define IsCTIDVar(node) \
45 : ((node) != NULL && \
46 : IsA((node), Var) && \
47 : ((Var *) (node))->varattno == SelfItemPointerAttributeNumber)
48 :
49 : /* one element in tss_tidexprs */
50 : typedef struct TidExpr
51 : {
52 : ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
53 : bool isarray; /* if true, it yields tid[] not just tid */
54 : CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
55 : } TidExpr;
56 :
57 : static void TidExprListCreate(TidScanState *tidstate);
58 : static void TidListEval(TidScanState *tidstate);
59 : static int itemptr_comparator(const void *a, const void *b);
60 : static TupleTableSlot *TidNext(TidScanState *node);
61 :
62 :
63 : /*
64 : * Extract the qual subexpressions that yield TIDs to search for,
65 : * and compile them into ExprStates if they're ordinary expressions.
66 : *
67 : * CURRENT OF is a special case that we can't compile usefully;
68 : * just drop it into the TidExpr list as-is.
69 : */
70 : static void
71 GIC 359 : TidExprListCreate(TidScanState *tidstate)
72 : {
73 359 : TidScan *node = (TidScan *) tidstate->ss.ps.plan;
74 : ListCell *l;
75 :
76 CBC 359 : tidstate->tss_tidexprs = NIL;
77 GIC 359 : tidstate->tss_isCurrentOf = false;
78 ECB :
79 GIC 730 : foreach(l, node->tidquals)
80 : {
81 CBC 371 : Expr *expr = (Expr *) lfirst(l);
82 371 : TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
83 :
84 371 : if (is_opclause(expr))
85 : {
86 ECB : Node *arg1;
87 : Node *arg2;
88 :
89 CBC 136 : arg1 = get_leftop(expr);
90 GIC 136 : arg2 = get_rightop(expr);
91 136 : if (IsCTIDVar(arg1))
92 112 : tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
93 : &tidstate->ss.ps);
94 CBC 24 : else if (IsCTIDVar(arg2))
95 24 : tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
96 ECB : &tidstate->ss.ps);
97 : else
98 UIC 0 : elog(ERROR, "could not identify CTID variable");
99 CBC 136 : tidexpr->isarray = false;
100 ECB : }
101 GIC 235 : else if (expr && IsA(expr, ScalarArrayOpExpr))
102 15 : {
103 GBC 15 : ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
104 ECB :
105 GIC 15 : Assert(IsCTIDVar(linitial(saex->args)));
106 CBC 15 : tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
107 ECB : &tidstate->ss.ps);
108 CBC 15 : tidexpr->isarray = true;
109 : }
110 220 : else if (expr && IsA(expr, CurrentOfExpr))
111 220 : {
112 GIC 220 : CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
113 ECB :
114 GIC 220 : tidexpr->cexpr = cexpr;
115 CBC 220 : tidstate->tss_isCurrentOf = true;
116 ECB : }
117 : else
118 UIC 0 : elog(ERROR, "could not identify CTID expression");
119 ECB :
120 CBC 371 : tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
121 : }
122 :
123 EUB : /* CurrentOfExpr could never appear OR'd with something else */
124 GIC 359 : Assert(list_length(tidstate->tss_tidexprs) == 1 ||
125 ECB : !tidstate->tss_isCurrentOf);
126 GIC 359 : }
127 :
128 : /*
129 ECB : * Compute the list of TIDs to be visited, by evaluating the expressions
130 : * for them.
131 : *
132 : * (The result is actually an array, not a list.)
133 : */
134 : static void
135 GIC 314 : TidListEval(TidScanState *tidstate)
136 : {
137 314 : ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
138 : TableScanDesc scan;
139 : ItemPointerData *tidList;
140 ECB : int numAllocTids;
141 : int numTids;
142 : ListCell *l;
143 :
144 : /*
145 : * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
146 : * the size of the table), so it makes sense to delay that until needed -
147 : * the node might never get executed.
148 : */
149 GIC 314 : if (tidstate->ss.ss_currentScanDesc == NULL)
150 311 : tidstate->ss.ss_currentScanDesc =
151 311 : table_beginscan_tid(tidstate->ss.ss_currentRelation,
152 311 : tidstate->ss.ps.state->es_snapshot);
153 314 : scan = tidstate->ss.ss_currentScanDesc;
154 ECB :
155 : /*
156 : * We initialize the array with enough slots for the case that all quals
157 : * are simple OpExprs or CurrentOfExprs. If there are any
158 : * ScalarArrayOpExprs, we may have to enlarge the array.
159 : */
160 GIC 314 : numAllocTids = list_length(tidstate->tss_tidexprs);
161 : tidList = (ItemPointerData *)
162 314 : palloc(numAllocTids * sizeof(ItemPointerData));
163 314 : numTids = 0;
164 :
165 CBC 604 : foreach(l, tidstate->tss_tidexprs)
166 : {
167 320 : TidExpr *tidexpr = (TidExpr *) lfirst(l);
168 ECB : ItemPointer itemptr;
169 : bool isNull;
170 :
171 GIC 320 : if (tidexpr->exprstate && !tidexpr->isarray)
172 ECB : {
173 : itemptr = (ItemPointer)
174 GIC 115 : DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
175 : econtext,
176 ECB : &isNull));
177 GIC 115 : if (isNull)
178 UIC 0 : continue;
179 ECB :
180 : /*
181 : * We silently discard any TIDs that the AM considers invalid
182 : * (E.g. for heap, they could be out of range at the time of scan
183 EUB : * start. Since we hold at least AccessShareLock on the table, it
184 : * won't be possible for someone to truncate away the blocks we
185 : * intend to visit.).
186 : */
187 GIC 115 : if (!table_tuple_tid_valid(scan, itemptr))
188 UIC 0 : continue;
189 :
190 GIC 115 : if (numTids >= numAllocTids)
191 : {
192 CBC 3 : numAllocTids *= 2;
193 EUB : tidList = (ItemPointerData *)
194 GIC 3 : repalloc(tidList,
195 ECB : numAllocTids * sizeof(ItemPointerData));
196 : }
197 CBC 115 : tidList[numTids++] = *itemptr;
198 : }
199 205 : else if (tidexpr->exprstate && tidexpr->isarray)
200 GIC 12 : {
201 : Datum arraydatum;
202 ECB : ArrayType *itemarray;
203 : Datum *ipdatums;
204 : bool *ipnulls;
205 : int ndatums;
206 : int i;
207 :
208 GIC 12 : arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
209 : econtext,
210 : &isNull);
211 12 : if (isNull)
212 UIC 0 : continue;
213 CBC 12 : itemarray = DatumGetArrayTypeP(arraydatum);
214 GNC 12 : deconstruct_array_builtin(itemarray, TIDOID, &ipdatums, &ipnulls, &ndatums);
215 GBC 12 : if (numTids + ndatums > numAllocTids)
216 ECB : {
217 CBC 9 : numAllocTids = numTids + ndatums;
218 ECB : tidList = (ItemPointerData *)
219 GIC 9 : repalloc(tidList,
220 ECB : numAllocTids * sizeof(ItemPointerData));
221 : }
222 CBC 36 : for (i = 0; i < ndatums; i++)
223 : {
224 GIC 24 : if (ipnulls[i])
225 LBC 0 : continue;
226 :
227 CBC 24 : itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
228 EUB :
229 GIC 24 : if (!table_tuple_tid_valid(scan, itemptr))
230 LBC 0 : continue;
231 :
232 CBC 24 : tidList[numTids++] = *itemptr;
233 EUB : }
234 GIC 12 : pfree(ipdatums);
235 CBC 12 : pfree(ipnulls);
236 : }
237 ECB : else
238 : {
239 : ItemPointerData cursor_tid;
240 :
241 GIC 193 : Assert(tidexpr->cexpr);
242 163 : if (execCurrentOf(tidexpr->cexpr, econtext,
243 193 : RelationGetRelid(tidstate->ss.ss_currentRelation),
244 ECB : &cursor_tid))
245 : {
246 CBC 138 : if (numTids >= numAllocTids)
247 : {
248 UIC 0 : numAllocTids *= 2;
249 ECB : tidList = (ItemPointerData *)
250 UIC 0 : repalloc(tidList,
251 EUB : numAllocTids * sizeof(ItemPointerData));
252 : }
253 GBC 138 : tidList[numTids++] = cursor_tid;
254 : }
255 : }
256 ECB : }
257 :
258 : /*
259 : * Sort the array of TIDs into order, and eliminate duplicates.
260 : * Eliminating duplicates is necessary since we want OR semantics across
261 : * the list. Sorting makes it easier to detect duplicates, and as a bonus
262 : * ensures that we will visit the heap in the most efficient way.
263 : */
264 GIC 284 : if (numTids > 1)
265 : {
266 : /* CurrentOfExpr could never appear OR'd with something else */
267 CBC 15 : Assert(!tidstate->tss_isCurrentOf);
268 :
269 GNC 15 : qsort(tidList, numTids, sizeof(ItemPointerData),
270 ECB : itemptr_comparator);
271 GIC 15 : numTids = qunique(tidList, numTids, sizeof(ItemPointerData),
272 ECB : itemptr_comparator);
273 : }
274 :
275 GIC 284 : tidstate->tss_TidList = tidList;
276 284 : tidstate->tss_NumTids = numTids;
277 284 : tidstate->tss_TidPtr = -1;
278 CBC 284 : }
279 ECB :
280 : /*
281 : * qsort comparator for ItemPointerData items
282 : */
283 : static int
284 GIC 39 : itemptr_comparator(const void *a, const void *b)
285 : {
286 39 : const ItemPointerData *ipa = (const ItemPointerData *) a;
287 CBC 39 : const ItemPointerData *ipb = (const ItemPointerData *) b;
288 GIC 39 : BlockNumber ba = ItemPointerGetBlockNumber(ipa);
289 CBC 39 : BlockNumber bb = ItemPointerGetBlockNumber(ipb);
290 39 : OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
291 39 : OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
292 ECB :
293 CBC 39 : if (ba < bb)
294 LBC 0 : return -1;
295 GIC 39 : if (ba > bb)
296 LBC 0 : return 1;
297 GBC 39 : if (oa < ob)
298 CBC 12 : return -1;
299 GBC 27 : if (oa > ob)
300 CBC 27 : return 1;
301 LBC 0 : return 0;
302 ECB : }
303 :
304 EUB : /* ----------------------------------------------------------------
305 : * TidNext
306 : *
307 : * Retrieve a tuple from the TidScan node's currentRelation
308 : * using the tids in the TidScanState information.
309 : *
310 : * ----------------------------------------------------------------
311 : */
312 : static TupleTableSlot *
313 GIC 573 : TidNext(TidScanState *node)
314 : {
315 : EState *estate;
316 ECB : ScanDirection direction;
317 : Snapshot snapshot;
318 : TableScanDesc scan;
319 : Relation heapRelation;
320 : TupleTableSlot *slot;
321 : ItemPointerData *tidList;
322 : int numTids;
323 : bool bBackward;
324 :
325 : /*
326 : * extract necessary information from tid scan node
327 : */
328 GIC 573 : estate = node->ss.ps.state;
329 573 : direction = estate->es_direction;
330 573 : snapshot = estate->es_snapshot;
331 CBC 573 : heapRelation = node->ss.ss_currentRelation;
332 573 : slot = node->ss.ss_ScanTupleSlot;
333 ECB :
334 : /*
335 : * First time through, compute the list of TIDs to be visited
336 : */
337 GIC 573 : if (node->tss_TidList == NULL)
338 314 : TidListEval(node);
339 :
340 CBC 543 : scan = node->ss.ss_currentScanDesc;
341 543 : tidList = node->tss_TidList;
342 GIC 543 : numTids = node->tss_NumTids;
343 ECB :
344 : /*
345 : * Initialize or advance scan position, depending on direction.
346 : */
347 GIC 543 : bBackward = ScanDirectionIsBackward(direction);
348 543 : if (bBackward)
349 : {
350 CBC 3 : if (node->tss_TidPtr < 0)
351 ECB : {
352 : /* initialize for backward scan */
353 LBC 0 : node->tss_TidPtr = numTids - 1;
354 : }
355 : else
356 GBC 3 : node->tss_TidPtr--;
357 : }
358 : else
359 ECB : {
360 GIC 540 : if (node->tss_TidPtr < 0)
361 : {
362 : /* initialize for forward scan */
363 CBC 284 : node->tss_TidPtr = 0;
364 : }
365 : else
366 256 : node->tss_TidPtr++;
367 : }
368 :
369 558 : while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
370 : {
371 GIC 277 : ItemPointerData tid = tidList[node->tss_TidPtr];
372 ECB :
373 : /*
374 : * For WHERE CURRENT OF, the tuple retrieved from the cursor might
375 : * since have been updated; if so, we should fetch the version that is
376 : * current according to our snapshot.
377 : */
378 GIC 277 : if (node->tss_isCurrentOf)
379 138 : table_tuple_get_latest_tid(scan, &tid);
380 :
381 CBC 277 : if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
382 262 : return slot;
383 :
384 ECB : /* Bad TID or failed snapshot qual; try next */
385 CBC 15 : if (bBackward)
386 UIC 0 : node->tss_TidPtr--;
387 : else
388 CBC 15 : node->tss_TidPtr++;
389 EUB :
390 GIC 15 : CHECK_FOR_INTERRUPTS();
391 ECB : }
392 :
393 : /*
394 : * if we get here it means the tid scan failed so we are at the end of the
395 : * scan..
396 : */
397 GIC 281 : return ExecClearTuple(slot);
398 : }
399 :
400 ECB : /*
401 : * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
402 : */
403 : static bool
404 UIC 0 : TidRecheck(TidScanState *node, TupleTableSlot *slot)
405 : {
406 : /*
407 EUB : * XXX shouldn't we check here to make sure tuple matches TID list? In
408 : * runtime-key case this is not certain, is it? However, in the WHERE
409 : * CURRENT OF case it might not match anyway ...
410 : */
411 UIC 0 : return true;
412 : }
413 :
414 EUB :
415 : /* ----------------------------------------------------------------
416 : * ExecTidScan(node)
417 : *
418 : * Scans the relation using tids and returns
419 : * the next qualifying tuple in the direction specified.
420 : * We call the ExecScan() routine and pass it the appropriate
421 : * access method functions.
422 : *
423 : * Conditions:
424 : * -- the "cursor" maintained by the AMI is positioned at the tuple
425 : * returned previously.
426 : *
427 : * Initial States:
428 : * -- the relation indicated is opened for scanning so that the
429 : * "cursor" is positioned before the first qualifying tuple.
430 : * -- tss_TidPtr is -1.
431 : * ----------------------------------------------------------------
432 : */
433 : static TupleTableSlot *
434 GIC 564 : ExecTidScan(PlanState *pstate)
435 : {
436 564 : TidScanState *node = castNode(TidScanState, pstate);
437 ECB :
438 GIC 564 : return ExecScan(&node->ss,
439 ECB : (ExecScanAccessMtd) TidNext,
440 : (ExecScanRecheckMtd) TidRecheck);
441 : }
442 :
443 : /* ----------------------------------------------------------------
444 : * ExecReScanTidScan(node)
445 : * ----------------------------------------------------------------
446 : */
447 : void
448 GIC 9 : ExecReScanTidScan(TidScanState *node)
449 : {
450 9 : if (node->tss_TidList)
451 CBC 3 : pfree(node->tss_TidList);
452 GIC 9 : node->tss_TidList = NULL;
453 CBC 9 : node->tss_NumTids = 0;
454 9 : node->tss_TidPtr = -1;
455 ECB :
456 : /* not really necessary, but seems good form */
457 CBC 9 : if (node->ss.ss_currentScanDesc)
458 GIC 3 : table_rescan(node->ss.ss_currentScanDesc, NULL);
459 :
460 CBC 9 : ExecScanReScan(&node->ss);
461 9 : }
462 :
463 ECB : /* ----------------------------------------------------------------
464 : * ExecEndTidScan
465 : *
466 : * Releases any storage allocated through C routines.
467 : * Returns nothing.
468 : * ----------------------------------------------------------------
469 : */
470 : void
471 GIC 299 : ExecEndTidScan(TidScanState *node)
472 : {
473 299 : if (node->ss.ss_currentScanDesc)
474 CBC 275 : table_endscan(node->ss.ss_currentScanDesc);
475 :
476 ECB : /*
477 : * Free the exprcontext
478 : */
479 GIC 299 : ExecFreeExprContext(&node->ss.ps);
480 :
481 : /*
482 ECB : * clear out tuple table slots
483 : */
484 GIC 299 : if (node->ss.ps.ps_ResultTupleSlot)
485 293 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
486 299 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
487 CBC 299 : }
488 ECB :
489 : /* ----------------------------------------------------------------
490 : * ExecInitTidScan
491 : *
492 : * Initializes the tid scan's state information, creates
493 : * scan keys, and opens the base and tid relations.
494 : *
495 : * Parameters:
496 : * node: TidScan node produced by the planner.
497 : * estate: the execution state initialized in InitPlan.
498 : * ----------------------------------------------------------------
499 : */
500 : TidScanState *
501 GIC 359 : ExecInitTidScan(TidScan *node, EState *estate, int eflags)
502 : {
503 : TidScanState *tidstate;
504 ECB : Relation currentRelation;
505 :
506 : /*
507 : * create state structure
508 : */
509 GIC 359 : tidstate = makeNode(TidScanState);
510 359 : tidstate->ss.ps.plan = (Plan *) node;
511 359 : tidstate->ss.ps.state = estate;
512 CBC 359 : tidstate->ss.ps.ExecProcNode = ExecTidScan;
513 ECB :
514 : /*
515 : * Miscellaneous initialization
516 : *
517 : * create expression context for node
518 : */
519 GIC 359 : ExecAssignExprContext(estate, &tidstate->ss.ps);
520 :
521 : /*
522 ECB : * mark tid list as not computed yet
523 : */
524 GIC 359 : tidstate->tss_TidList = NULL;
525 359 : tidstate->tss_NumTids = 0;
526 359 : tidstate->tss_TidPtr = -1;
527 ECB :
528 : /*
529 : * open the scan relation
530 : */
531 GIC 359 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
532 :
533 359 : tidstate->ss.ss_currentRelation = currentRelation;
534 CBC 359 : tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
535 :
536 ECB : /*
537 : * get the scan type from the relation descriptor.
538 : */
539 GIC 359 : ExecInitScanTupleSlot(estate, &tidstate->ss,
540 : RelationGetDescr(currentRelation),
541 : table_slot_callbacks(currentRelation));
542 ECB :
543 : /*
544 : * Initialize result type and projection.
545 : */
546 GIC 359 : ExecInitResultTypeTL(&tidstate->ss.ps);
547 359 : ExecAssignScanProjectionInfo(&tidstate->ss);
548 :
549 ECB : /*
550 : * initialize child expressions
551 : */
552 GIC 359 : tidstate->ss.ps.qual =
553 359 : ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
554 :
555 CBC 359 : TidExprListCreate(tidstate);
556 ECB :
557 : /*
558 : * all done.
559 : */
560 GIC 359 : return tidstate;
561 : }
|