Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nodeBitmapHeapscan.c
4 : * Routines to support bitmapped scans of relations
5 : *
6 : * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 : * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 : * special snapshots). The reason is that since index and heap scans are
9 : * decoupled, there can be no assurance that the index tuple prompting a
10 : * visit to a particular heap TID still exists when the visit is made.
11 : * Therefore the tuple might not exist anymore either (which is OK because
12 : * heap_fetch will cope) --- but worse, the tuple slot could have been
13 : * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 : * certain to fail the time qual and so it will not be mistakenly returned,
15 : * but with anything else we might return a tuple that doesn't meet the
16 : * required index qual conditions.
17 : *
18 : *
19 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
20 : * Portions Copyright (c) 1994, Regents of the University of California
21 : *
22 : *
23 : * IDENTIFICATION
24 : * src/backend/executor/nodeBitmapHeapscan.c
25 : *
26 : *-------------------------------------------------------------------------
27 : */
28 : /*
29 : * INTERFACE ROUTINES
30 : * ExecBitmapHeapScan scans a relation using bitmap info
31 : * ExecBitmapHeapNext workhorse for above
32 : * ExecInitBitmapHeapScan creates and initializes state info.
33 : * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 : * ExecEndBitmapHeapScan releases all storage.
35 : */
36 : #include "postgres.h"
37 :
38 : #include <math.h>
39 :
40 : #include "access/relscan.h"
41 : #include "access/tableam.h"
42 : #include "access/transam.h"
43 : #include "access/visibilitymap.h"
44 : #include "executor/execdebug.h"
45 : #include "executor/nodeBitmapHeapscan.h"
46 : #include "miscadmin.h"
47 : #include "pgstat.h"
48 : #include "storage/bufmgr.h"
49 : #include "storage/predicate.h"
50 : #include "utils/memutils.h"
51 : #include "utils/rel.h"
52 : #include "utils/snapmgr.h"
53 : #include "utils/spccache.h"
54 :
55 : static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
56 : static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
57 : static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
58 : TBMIterateResult *tbmres);
59 : static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
60 : static inline void BitmapPrefetch(BitmapHeapScanState *node,
61 : TableScanDesc scan);
62 : static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
63 :
64 :
65 : /* ----------------------------------------------------------------
66 : * BitmapHeapNext
67 : *
68 : * Retrieve next tuple from the BitmapHeapScan node's currentRelation
69 : * ----------------------------------------------------------------
70 : */
71 : static TupleTableSlot *
6564 tgl 72 CBC 3827972 : BitmapHeapNext(BitmapHeapScanState *node)
73 : {
74 : ExprContext *econtext;
75 : TableScanDesc scan;
76 : TIDBitmap *tbm;
2223 77 3827972 : TBMIterator *tbmiterator = NULL;
78 3827972 : TBMSharedIterator *shared_tbmiterator = NULL;
79 : TBMIterateResult *tbmres;
80 : TupleTableSlot *slot;
rhaas 81 3827972 : ParallelBitmapHeapState *pstate = node->pstate;
82 3827972 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
83 :
84 : /*
85 : * extract necessary information from index scan node
86 : */
6564 tgl 87 3827972 : econtext = node->ss.ps.ps_ExprContext;
88 3827972 : slot = node->ss.ss_ScanTupleSlot;
6343 89 3827972 : scan = node->ss.ss_currentScanDesc;
6564 90 3827972 : tbm = node->tbm;
2223 rhaas 91 3827972 : if (pstate == NULL)
92 3230801 : tbmiterator = node->tbmiterator;
93 : else
94 597171 : shared_tbmiterator = node->shared_tbmiterator;
6564 tgl 95 3827972 : tbmres = node->tbmres;
96 :
97 : /*
98 : * If we haven't yet performed the underlying index scan, do it, and begin
99 : * the iteration over the bitmap.
100 : *
101 : * For prefetching, we use *two* iterators, one for the pages we are
102 : * actually scanning and another that runs ahead of the first for
103 : * prefetching. node->prefetch_pages tracks exactly how many pages ahead
104 : * the prefetch iterator is. Also, node->prefetch_target tracks the
105 : * desired prefetch distance, which starts small and increases up to the
106 : * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
107 : * a scan that stops after a few tuples because of a LIMIT.
108 : */
2223 rhaas 109 3827972 : if (!node->initialized)
110 : {
111 8700 : if (!pstate)
112 : {
113 8526 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114 :
115 8526 : if (!tbm || !IsA(tbm, TIDBitmap))
2223 rhaas 116 UBC 0 : elog(ERROR, "unrecognized result from subplan");
117 :
2223 rhaas 118 CBC 8526 : node->tbm = tbm;
119 8526 : node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
120 8526 : node->tbmres = tbmres = NULL;
121 :
122 : #ifdef USE_PREFETCH
123 8526 : if (node->prefetch_maximum > 0)
124 : {
125 8526 : node->prefetch_iterator = tbm_begin_iterate(tbm);
126 8526 : node->prefetch_pages = 0;
127 8526 : node->prefetch_target = -1;
128 : }
129 : #endif /* USE_PREFETCH */
130 : }
131 : else
132 : {
133 : /*
134 : * The leader will immediately come out of the function, but
135 : * others will be blocked until leader populates the TBM and wakes
136 : * them up.
137 : */
138 174 : if (BitmapShouldInitializeSharedState(pstate))
139 : {
140 36 : tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
141 36 : if (!tbm || !IsA(tbm, TIDBitmap))
2223 rhaas 142 UBC 0 : elog(ERROR, "unrecognized result from subplan");
143 :
2223 rhaas 144 CBC 36 : node->tbm = tbm;
145 :
146 : /*
147 : * Prepare to iterate over the TBM. This will return the
148 : * dsa_pointer of the iterator state which will be used by
149 : * multiple processes to iterate jointly.
150 : */
151 36 : pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
152 : #ifdef USE_PREFETCH
153 36 : if (node->prefetch_maximum > 0)
154 : {
155 36 : pstate->prefetch_iterator =
156 36 : tbm_prepare_shared_iterate(tbm);
157 :
158 : /*
159 : * We don't need the mutex here as we haven't yet woke up
160 : * others.
161 : */
162 36 : pstate->prefetch_pages = 0;
163 36 : pstate->prefetch_target = -1;
164 : }
165 : #endif
166 :
167 : /* We have initialized the shared state so wake up others. */
168 36 : BitmapDoneInitializingSharedState(pstate);
169 : }
170 :
171 : /* Allocate a private iterator and attach the shared state to it */
172 174 : node->shared_tbmiterator = shared_tbmiterator =
173 174 : tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
174 174 : node->tbmres = tbmres = NULL;
175 :
176 : #ifdef USE_PREFETCH
177 174 : if (node->prefetch_maximum > 0)
178 : {
179 174 : node->shared_prefetch_iterator =
180 174 : tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
181 : }
182 : #endif /* USE_PREFETCH */
183 : }
184 8700 : node->initialized = true;
185 : }
186 :
187 : for (;;)
6564 tgl 188 498342 : {
189 : bool skip_fetch;
190 :
2084 andres 191 4326314 : CHECK_FOR_INTERRUPTS();
192 :
193 : /*
194 : * Get next page of results if needed
195 : */
6564 tgl 196 4326314 : if (tbmres == NULL)
197 : {
2223 rhaas 198 273342 : if (!pstate)
199 258114 : node->tbmres = tbmres = tbm_iterate(tbmiterator);
200 : else
201 15228 : node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
6564 tgl 202 273342 : if (tbmres == NULL)
203 : {
204 : /* no more entries in the bitmap */
205 8511 : break;
206 : }
207 :
2229 rhaas 208 264831 : BitmapAdjustPrefetchIterator(node, tbmres);
209 :
210 : /*
211 : * We can skip fetching the heap page if we don't need any fields
212 : * from the heap, and the bitmap entries don't need rechecking,
213 : * and all tuples on the page are visible to our transaction.
214 : *
215 : * XXX: It's a layering violation that we do these checks above
216 : * tableam, they should probably moved below it at some point.
217 : */
1470 andres 218 571390 : skip_fetch = (node->can_skip_fetch &&
219 293275 : !tbmres->recheck &&
220 28444 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
221 : tbmres->blockno,
222 : &node->vmbuffer));
223 :
224 264831 : if (skip_fetch)
225 : {
226 : /* can't be lossy in the skip_fetch case */
227 10383 : Assert(tbmres->ntuples >= 0);
228 :
229 : /*
230 : * The number of tuples on this page is put into
231 : * node->return_empty_tuples.
232 : */
233 10383 : node->return_empty_tuples = tbmres->ntuples;
234 : }
235 254448 : else if (!table_scan_bitmap_next_block(scan, tbmres))
236 : {
237 : /* AM doesn't think this block is valid, skip */
238 4793 : continue;
239 : }
240 :
3373 rhaas 241 260035 : if (tbmres->ntuples >= 0)
242 187705 : node->exact_pages++;
243 : else
244 72330 : node->lossy_pages++;
245 :
246 : /* Adjust the prefetch target */
2229 247 260035 : BitmapAdjustPrefetchTarget(node);
248 : }
249 : else
250 : {
251 : /*
252 : * Continuing in previously obtained page.
253 : */
254 :
255 : #ifdef USE_PREFETCH
256 :
257 : /*
258 : * Try to prefetch at least a few pages even before we get to the
259 : * second page if we don't stop reading after the first tuple.
260 : */
2223 261 4052972 : if (!pstate)
262 : {
263 3466322 : if (node->prefetch_target < node->prefetch_maximum)
264 7324 : node->prefetch_target++;
265 : }
266 586650 : else if (pstate->prefetch_target < node->prefetch_maximum)
267 : {
268 : /* take spinlock while updating shared state */
269 933 : SpinLockAcquire(&pstate->mutex);
270 933 : if (pstate->prefetch_target < node->prefetch_maximum)
271 933 : pstate->prefetch_target++;
272 933 : SpinLockRelease(&pstate->mutex);
273 : }
274 : #endif /* USE_PREFETCH */
275 : }
276 :
277 : /*
278 : * We issue prefetch requests *after* fetching the current page to try
279 : * to avoid having prefetching interfere with the main I/O. Also, this
280 : * should happen only when we have determined there is still something
281 : * to do on the current page, else we may uselessly prefetch the same
282 : * page we are just about to request for real.
283 : *
284 : * XXX: It's a layering violation that we do these checks above
285 : * tableam, they should probably moved below it at some point.
286 : */
2229 287 4313007 : BitmapPrefetch(node, scan);
288 :
1470 andres 289 4313007 : if (node->return_empty_tuples > 0)
290 : {
291 : /*
292 : * If we don't have to fetch the tuple, just return nulls.
293 : */
1985 tgl 294 294048 : ExecStoreAllNullTuple(slot);
295 :
1470 andres 296 294048 : if (--node->return_empty_tuples == 0)
297 : {
298 : /* no more tuples to return in the next round */
299 10383 : node->tbmres = tbmres = NULL;
300 : }
301 : }
302 : else
303 : {
304 : /*
305 : * Attempt to fetch tuple from AM.
306 : */
307 4018959 : if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
308 : {
309 : /* nothing more to look at on this page */
310 254259 : node->tbmres = tbmres = NULL;
311 254259 : continue;
312 : }
313 :
314 : /*
315 : * If we are using lossy info, we have to recheck the qual
316 : * conditions at every tuple.
317 : */
1985 tgl 318 3764700 : if (tbmres->recheck)
319 : {
320 2493977 : econtext->ecxt_scantuple = slot;
1896 andres 321 2493977 : if (!ExecQualAndReset(node->bitmapqualorig, econtext))
322 : {
323 : /* Fails recheck, so drop it and loop back for another */
1985 tgl 324 239290 : InstrCountFiltered2(node, 1);
325 239290 : ExecClearTuple(slot);
326 239290 : continue;
327 : }
328 : }
329 : }
330 :
331 : /* OK to return this tuple */
6343 332 3819458 : return slot;
333 : }
334 :
335 : /*
336 : * if we get here it means we are at the end of the scan..
337 : */
338 8511 : return ExecClearTuple(slot);
339 : }
340 :
341 : /*
342 : * BitmapDoneInitializingSharedState - Shared state is initialized
343 : *
344 : * By this time the leader has already populated the TBM and initialized the
345 : * shared state so wake up other processes.
346 : */
347 : static inline void
2223 rhaas 348 36 : BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
349 : {
350 36 : SpinLockAcquire(&pstate->mutex);
351 36 : pstate->state = BM_FINISHED;
352 36 : SpinLockRelease(&pstate->mutex);
353 36 : ConditionVariableBroadcast(&pstate->cv);
354 36 : }
355 :
356 : /*
357 : * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
358 : */
359 : static inline void
2229 360 264831 : BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
361 : TBMIterateResult *tbmres)
362 : {
363 : #ifdef USE_PREFETCH
2223 364 264831 : ParallelBitmapHeapState *pstate = node->pstate;
365 :
366 264831 : if (pstate == NULL)
367 : {
368 249777 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
369 :
370 249777 : if (node->prefetch_pages > 0)
371 : {
372 : /* The main iterator has closed the distance by one page */
373 241627 : node->prefetch_pages--;
374 : }
375 8150 : else if (prefetch_iterator)
376 : {
377 : /* Do not let the prefetch iterator get behind the main one */
378 8150 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
379 :
380 8150 : if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
2223 rhaas 381 UBC 0 : elog(ERROR, "prefetch and main iterators are out of sync");
382 : }
2223 rhaas 383 CBC 249777 : return;
384 : }
385 :
386 15054 : if (node->prefetch_maximum > 0)
387 : {
388 15054 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
389 :
390 15054 : SpinLockAcquire(&pstate->mutex);
391 15054 : if (pstate->prefetch_pages > 0)
392 : {
2196 393 15018 : pstate->prefetch_pages--;
2223 394 15018 : SpinLockRelease(&pstate->mutex);
395 : }
396 : else
397 : {
398 : /* Release the mutex before iterating */
399 36 : SpinLockRelease(&pstate->mutex);
400 :
401 : /*
402 : * In case of shared mode, we can not ensure that the current
403 : * blockno of the main iterator and that of the prefetch iterator
404 : * are same. It's possible that whatever blockno we are
405 : * prefetching will be processed by another process. Therefore,
406 : * we don't validate the blockno here as we do in non-parallel
407 : * case.
408 : */
409 36 : if (prefetch_iterator)
410 36 : tbm_shared_iterate(prefetch_iterator);
411 : }
412 : }
413 : #endif /* USE_PREFETCH */
414 : }
415 :
416 : /*
417 : * BitmapAdjustPrefetchTarget - Adjust the prefetch target
418 : *
419 : * Increase prefetch target if it's not yet at the max. Note that
420 : * we will increase it to zero after fetching the very first
421 : * page/tuple, then to one after the second tuple is fetched, then
422 : * it doubles as later pages are fetched.
423 : */
424 : static inline void
2229 425 260035 : BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
426 : {
427 : #ifdef USE_PREFETCH
2223 428 260035 : ParallelBitmapHeapState *pstate = node->pstate;
429 :
430 260035 : if (pstate == NULL)
431 : {
432 244981 : if (node->prefetch_target >= node->prefetch_maximum)
433 : /* don't increase any further */ ;
434 7420 : else if (node->prefetch_target >= node->prefetch_maximum / 2)
435 635 : node->prefetch_target = node->prefetch_maximum;
436 6785 : else if (node->prefetch_target > 0)
2223 rhaas 437 UBC 0 : node->prefetch_target *= 2;
438 : else
2223 rhaas 439 CBC 6785 : node->prefetch_target++;
440 244981 : return;
441 : }
442 :
443 : /* Do an unlocked check first to save spinlock acquisitions. */
444 15054 : if (pstate->prefetch_target < node->prefetch_maximum)
445 : {
446 66 : SpinLockAcquire(&pstate->mutex);
447 66 : if (pstate->prefetch_target >= node->prefetch_maximum)
448 : /* don't increase any further */ ;
449 66 : else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
450 30 : pstate->prefetch_target = node->prefetch_maximum;
451 36 : else if (pstate->prefetch_target > 0)
2223 rhaas 452 UBC 0 : pstate->prefetch_target *= 2;
453 : else
2223 rhaas 454 CBC 36 : pstate->prefetch_target++;
455 66 : SpinLockRelease(&pstate->mutex);
456 : }
457 : #endif /* USE_PREFETCH */
458 : }
459 :
460 : /*
461 : * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
462 : */
463 : static inline void
1490 andres 464 4313007 : BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
465 : {
466 : #ifdef USE_PREFETCH
2223 rhaas 467 4313007 : ParallelBitmapHeapState *pstate = node->pstate;
468 :
469 4313007 : if (pstate == NULL)
470 : {
471 3711303 : TBMIterator *prefetch_iterator = node->prefetch_iterator;
472 :
473 3711303 : if (prefetch_iterator)
474 : {
475 7576753 : while (node->prefetch_pages < node->prefetch_target)
476 : {
477 248883 : TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
478 : bool skip_fetch;
479 :
480 248883 : if (tbmpre == NULL)
481 : {
482 : /* No more pages to prefetch */
483 7256 : tbm_end_iterate(prefetch_iterator);
484 7256 : node->prefetch_iterator = NULL;
485 7256 : break;
486 : }
487 241627 : node->prefetch_pages++;
488 :
489 : /*
490 : * If we expect not to have to actually read this heap page,
491 : * skip this prefetch call, but continue to run the prefetch
492 : * logic normally. (Would it be better not to increment
493 : * prefetch_pages?)
494 : *
495 : * This depends on the assumption that the index AM will
496 : * report the same recheck flag for this future heap page as
497 : * it did for the current heap page; which is not a certainty
498 : * but is true in many cases.
499 : */
1985 tgl 500 536408 : skip_fetch = (node->can_skip_fetch &&
501 257054 : (node->tbmres ? !node->tbmres->recheck : false) &&
502 15427 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
503 : tbmpre->blockno,
504 : &node->pvmbuffer));
505 :
506 241627 : if (!skip_fetch)
507 241603 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508 : }
509 : }
510 :
2223 rhaas 511 3711303 : return;
512 : }
513 :
514 601704 : if (pstate->prefetch_pages < pstate->prefetch_target)
515 : {
516 68864 : TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
517 :
518 68864 : if (prefetch_iterator)
519 : {
520 : while (1)
2229 521 15018 : {
522 : TBMIterateResult *tbmpre;
2223 523 30320 : bool do_prefetch = false;
524 : bool skip_fetch;
525 :
526 : /*
527 : * Recheck under the mutex. If some other process has already
528 : * done enough prefetching then we need not to do anything.
529 : */
530 30320 : SpinLockAcquire(&pstate->mutex);
531 30320 : if (pstate->prefetch_pages < pstate->prefetch_target)
532 : {
533 15066 : pstate->prefetch_pages++;
534 15066 : do_prefetch = true;
535 : }
536 30320 : SpinLockRelease(&pstate->mutex);
537 :
538 30320 : if (!do_prefetch)
539 15254 : return;
540 :
541 15066 : tbmpre = tbm_shared_iterate(prefetch_iterator);
542 15066 : if (tbmpre == NULL)
543 : {
544 : /* No more pages to prefetch */
545 48 : tbm_end_shared_iterate(prefetch_iterator);
546 48 : node->shared_prefetch_iterator = NULL;
547 48 : break;
548 : }
549 :
550 : /* As above, skip prefetch if we expect not to need page */
1985 tgl 551 58080 : skip_fetch = (node->can_skip_fetch &&
552 27278 : (node->tbmres ? !node->tbmres->recheck : false) &&
553 12260 : VM_ALL_VISIBLE(node->ss.ss_currentRelation,
554 : tbmpre->blockno,
555 : &node->pvmbuffer));
556 :
557 15018 : if (!skip_fetch)
558 4698 : PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559 : }
560 : }
561 : }
562 : #endif /* USE_PREFETCH */
563 : }
564 :
565 : /*
566 : * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
567 : */
568 : static bool
4913 tgl 569 UBC 0 : BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
570 : {
571 : ExprContext *econtext;
572 :
573 : /*
574 : * extract necessary information from index scan node
575 : */
576 0 : econtext = node->ss.ps.ps_ExprContext;
577 :
578 : /* Does the tuple meet the original qual conditions? */
579 0 : econtext->ecxt_scantuple = slot;
1896 andres 580 0 : return ExecQualAndReset(node->bitmapqualorig, econtext);
581 : }
582 :
583 : /* ----------------------------------------------------------------
584 : * ExecBitmapHeapScan(node)
585 : * ----------------------------------------------------------------
586 : */
587 : static TupleTableSlot *
2092 andres 588 CBC 3725555 : ExecBitmapHeapScan(PlanState *pstate)
589 : {
590 3725555 : BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
591 :
4913 tgl 592 3725555 : return ExecScan(&node->ss,
593 : (ExecScanAccessMtd) BitmapHeapNext,
594 : (ExecScanRecheckMtd) BitmapHeapRecheck);
595 : }
596 :
597 : /* ----------------------------------------------------------------
598 : * ExecReScanBitmapHeapScan(node)
599 : * ----------------------------------------------------------------
600 : */
601 : void
4654 602 1720 : ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
603 : {
2878 bruce 604 1720 : PlanState *outerPlan = outerPlanState(node);
605 :
606 : /* rescan to release any page pin */
1490 andres 607 1720 : table_rescan(node->ss.ss_currentScanDesc, NULL);
608 :
609 : /* release bitmaps and buffers if any */
5202 tgl 610 1720 : if (node->tbmiterator)
611 1467 : tbm_end_iterate(node->tbmiterator);
5200 612 1720 : if (node->prefetch_iterator)
613 560 : tbm_end_iterate(node->prefetch_iterator);
2223 rhaas 614 1720 : if (node->shared_tbmiterator)
615 27 : tbm_end_shared_iterate(node->shared_tbmiterator);
616 1720 : if (node->shared_prefetch_iterator)
2223 rhaas 617 UBC 0 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
6564 tgl 618 CBC 1720 : if (node->tbm)
619 1494 : tbm_free(node->tbm);
1985 620 1720 : if (node->vmbuffer != InvalidBuffer)
621 27 : ReleaseBuffer(node->vmbuffer);
622 1720 : if (node->pvmbuffer != InvalidBuffer)
623 27 : ReleaseBuffer(node->pvmbuffer);
6564 624 1720 : node->tbm = NULL;
5202 625 1720 : node->tbmiterator = NULL;
6564 626 1720 : node->tbmres = NULL;
5200 627 1720 : node->prefetch_iterator = NULL;
2223 rhaas 628 1720 : node->initialized = false;
629 1720 : node->shared_tbmiterator = NULL;
630 1720 : node->shared_prefetch_iterator = NULL;
1985 tgl 631 1720 : node->vmbuffer = InvalidBuffer;
632 1720 : node->pvmbuffer = InvalidBuffer;
633 :
4913 634 1720 : ExecScanReScan(&node->ss);
635 :
636 : /*
637 : * if chgParam of subnode is not null then plan will be re-scanned by
638 : * first ExecProcNode.
639 : */
2897 rhaas 640 1720 : if (outerPlan->chgParam == NULL)
641 77 : ExecReScan(outerPlan);
6564 tgl 642 1720 : }
643 :
644 : /* ----------------------------------------------------------------
645 : * ExecEndBitmapHeapScan
646 : * ----------------------------------------------------------------
647 : */
648 : void
649 10567 : ExecEndBitmapHeapScan(BitmapHeapScanState *node)
650 : {
651 : TableScanDesc scanDesc;
652 :
653 : /*
654 : * extract information from the node
655 : */
656 10567 : scanDesc = node->ss.ss_currentScanDesc;
657 :
658 : /*
659 : * Free the exprcontext
660 : */
661 10567 : ExecFreeExprContext(&node->ss.ps);
662 :
663 : /*
664 : * clear out tuple table slots
665 : */
1612 andres 666 10567 : if (node->ss.ps.ps_ResultTupleSlot)
667 8680 : ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
6564 tgl 668 10567 : ExecClearTuple(node->ss.ss_ScanTupleSlot);
669 :
670 : /*
671 : * close down subplans
672 : */
673 10567 : ExecEndNode(outerPlanState(node));
674 :
675 : /*
676 : * release bitmaps and buffers if any
677 : */
5202 678 10567 : if (node->tbmiterator)
679 7028 : tbm_end_iterate(node->tbmiterator);
5200 680 10567 : if (node->prefetch_iterator)
681 704 : tbm_end_iterate(node->prefetch_iterator);
6564 682 10567 : if (node->tbm)
683 7037 : tbm_free(node->tbm);
2223 rhaas 684 10567 : if (node->shared_tbmiterator)
685 147 : tbm_end_shared_iterate(node->shared_tbmiterator);
686 10567 : if (node->shared_prefetch_iterator)
687 126 : tbm_end_shared_iterate(node->shared_prefetch_iterator);
1985 tgl 688 10567 : if (node->vmbuffer != InvalidBuffer)
689 15 : ReleaseBuffer(node->vmbuffer);
690 10567 : if (node->pvmbuffer != InvalidBuffer)
691 12 : ReleaseBuffer(node->pvmbuffer);
692 :
693 : /*
694 : * close heap scan
695 : */
1490 andres 696 10567 : table_endscan(scanDesc);
6564 tgl 697 10567 : }
698 :
699 : /* ----------------------------------------------------------------
700 : * ExecInitBitmapHeapScan
701 : *
702 : * Initializes the scan's state information.
703 : * ----------------------------------------------------------------
704 : */
705 : BitmapHeapScanState *
6249 706 10598 : ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
707 : {
708 : BitmapHeapScanState *scanstate;
709 : Relation currentRelation;
710 :
711 : /* check for unsupported flags */
712 10598 : Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
713 :
714 : /*
715 : * Assert caller didn't ask for an unsafe snapshot --- see comments at
716 : * head of file.
717 : */
6343 718 10598 : Assert(IsMVCCSnapshot(estate->es_snapshot));
719 :
720 : /*
721 : * create state structure
722 : */
6564 723 10598 : scanstate = makeNode(BitmapHeapScanState);
724 10598 : scanstate->ss.ps.plan = (Plan *) node;
725 10598 : scanstate->ss.ps.state = estate;
2092 andres 726 10598 : scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
727 :
6564 tgl 728 10598 : scanstate->tbm = NULL;
5202 729 10598 : scanstate->tbmiterator = NULL;
6564 730 10598 : scanstate->tbmres = NULL;
1470 andres 731 10598 : scanstate->return_empty_tuples = 0;
1985 tgl 732 10598 : scanstate->vmbuffer = InvalidBuffer;
733 10598 : scanstate->pvmbuffer = InvalidBuffer;
3373 rhaas 734 10598 : scanstate->exact_pages = 0;
735 10598 : scanstate->lossy_pages = 0;
5200 tgl 736 10598 : scanstate->prefetch_iterator = NULL;
737 10598 : scanstate->prefetch_pages = 0;
738 10598 : scanstate->prefetch_target = 0;
2223 rhaas 739 10598 : scanstate->pscan_len = 0;
740 10598 : scanstate->initialized = false;
741 10598 : scanstate->shared_tbmiterator = NULL;
1985 tgl 742 10598 : scanstate->shared_prefetch_iterator = NULL;
2223 rhaas 743 10598 : scanstate->pstate = NULL;
744 :
745 : /*
746 : * We can potentially skip fetching heap pages if we do not need any
747 : * columns of the table, either for checking non-indexable quals or for
748 : * returning data. This test is a bit simplistic, as it checks the
749 : * stronger condition that there's no qual or return tlist at all. But in
750 : * most cases it's probably not worth working harder than that.
751 : */
1985 tgl 752 20222 : scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
753 9624 : node->scan.plan.targetlist == NIL);
754 :
755 : /*
756 : * Miscellaneous initialization
757 : *
758 : * create expression context for node
759 : */
6564 760 10598 : ExecAssignExprContext(estate, &scanstate->ss.ps);
761 :
762 : /*
763 : * open the scan relation
764 : */
1878 andres 765 10598 : currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
766 :
767 : /*
768 : * initialize child nodes
769 : */
770 10598 : outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
771 :
772 : /*
773 : * get the scan type from the relation descriptor.
774 : */
775 10598 : ExecInitScanTupleSlot(estate, &scanstate->ss,
776 : RelationGetDescr(currentRelation),
777 : table_slot_callbacks(currentRelation));
778 :
779 : /*
780 : * Initialize result type and projection.
781 : */
1612 782 10598 : ExecInitResultTypeTL(&scanstate->ss.ps);
1878 783 10598 : ExecAssignScanProjectionInfo(&scanstate->ss);
784 :
785 : /*
786 : * initialize child expressions
787 : */
788 10598 : scanstate->ss.ps.qual =
789 10598 : ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
790 10598 : scanstate->bitmapqualorig =
791 10598 : ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
792 :
793 : /*
794 : * Maximum number of prefetches for the tablespace if configured,
795 : * otherwise the current value of the effective_io_concurrency GUC.
796 : */
1119 tmunro 797 10598 : scanstate->prefetch_maximum =
2770 alvherre 798 10598 : get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
799 :
6564 tgl 800 10598 : scanstate->ss.ss_currentRelation = currentRelation;
801 :
1490 andres 802 10598 : scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
803 : estate->es_snapshot,
804 : 0,
805 : NULL);
806 :
807 : /*
808 : * all done.
809 : */
6564 tgl 810 10598 : return scanstate;
811 : }
812 :
813 : /*----------------
814 : * BitmapShouldInitializeSharedState
815 : *
816 : * The first process to come here and see the state to the BM_INITIAL
817 : * will become the leader for the parallel bitmap scan and will be
818 : * responsible for populating the TIDBitmap. The other processes will
819 : * be blocked by the condition variable until the leader wakes them up.
820 : * ---------------
821 : */
822 : static bool
2223 rhaas 823 174 : BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
824 : {
825 : SharedBitmapState state;
826 :
827 : while (1)
828 : {
829 174 : SpinLockAcquire(&pstate->mutex);
830 174 : state = pstate->state;
831 174 : if (pstate->state == BM_INITIAL)
832 36 : pstate->state = BM_INPROGRESS;
833 174 : SpinLockRelease(&pstate->mutex);
834 :
835 : /* Exit if bitmap is done, or if we're the leader. */
836 174 : if (state != BM_INPROGRESS)
837 174 : break;
838 :
839 : /* Wait for the leader to wake us up. */
2223 rhaas 840 UBC 0 : ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
841 : }
842 :
2223 rhaas 843 CBC 174 : ConditionVariableCancelSleep();
844 :
845 174 : return (state == BM_INITIAL);
846 : }
847 :
848 : /* ----------------------------------------------------------------
849 : * ExecBitmapHeapEstimate
850 : *
851 : * Compute the amount of space we'll need in the parallel
852 : * query DSM, and inform pcxt->estimator about our needs.
853 : * ----------------------------------------------------------------
854 : */
855 : void
856 9 : ExecBitmapHeapEstimate(BitmapHeapScanState *node,
857 : ParallelContext *pcxt)
858 : {
859 9 : EState *estate = node->ss.ps.state;
860 :
861 9 : node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
862 : phs_snapshot_data),
863 : EstimateSnapshotSpace(estate->es_snapshot));
864 :
865 9 : shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
866 9 : shm_toc_estimate_keys(&pcxt->estimator, 1);
867 9 : }
868 :
869 : /* ----------------------------------------------------------------
870 : * ExecBitmapHeapInitializeDSM
871 : *
872 : * Set up a parallel bitmap heap scan descriptor.
873 : * ----------------------------------------------------------------
874 : */
875 : void
876 9 : ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
877 : ParallelContext *pcxt)
878 : {
879 : ParallelBitmapHeapState *pstate;
880 9 : EState *estate = node->ss.ps.state;
1958 881 9 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
882 :
883 : /* If there's no DSA, there are no workers; initialize nothing. */
884 9 : if (dsa == NULL)
1958 rhaas 885 UBC 0 : return;
886 :
2223 rhaas 887 CBC 9 : pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
888 :
889 9 : pstate->tbmiterator = 0;
890 9 : pstate->prefetch_iterator = 0;
891 :
892 : /* Initialize the mutex */
893 9 : SpinLockInit(&pstate->mutex);
894 9 : pstate->prefetch_pages = 0;
895 9 : pstate->prefetch_target = 0;
896 9 : pstate->state = BM_INITIAL;
897 :
898 9 : ConditionVariableInit(&pstate->cv);
899 9 : SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
900 :
901 9 : shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
902 9 : node->pstate = pstate;
903 : }
904 :
905 : /* ----------------------------------------------------------------
906 : * ExecBitmapHeapReInitializeDSM
907 : *
908 : * Reset shared state before beginning a fresh scan.
909 : * ----------------------------------------------------------------
910 : */
911 : void
2048 tgl 912 27 : ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
913 : ParallelContext *pcxt)
914 : {
915 27 : ParallelBitmapHeapState *pstate = node->pstate;
916 27 : dsa_area *dsa = node->ss.ps.state->es_query_dsa;
917 :
918 : /* If there's no DSA, there are no workers; do nothing. */
1958 rhaas 919 27 : if (dsa == NULL)
1958 rhaas 920 UBC 0 : return;
921 :
2048 tgl 922 CBC 27 : pstate->state = BM_INITIAL;
923 :
924 27 : if (DsaPointerIsValid(pstate->tbmiterator))
925 27 : tbm_free_shared_area(dsa, pstate->tbmiterator);
926 :
927 27 : if (DsaPointerIsValid(pstate->prefetch_iterator))
928 27 : tbm_free_shared_area(dsa, pstate->prefetch_iterator);
929 :
930 27 : pstate->tbmiterator = InvalidDsaPointer;
931 27 : pstate->prefetch_iterator = InvalidDsaPointer;
932 : }
933 :
934 : /* ----------------------------------------------------------------
935 : * ExecBitmapHeapInitializeWorker
936 : *
937 : * Copy relevant information from TOC into planstate.
938 : * ----------------------------------------------------------------
939 : */
940 : void
1970 andres 941 138 : ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
942 : ParallelWorkerContext *pwcxt)
943 : {
944 : ParallelBitmapHeapState *pstate;
945 : Snapshot snapshot;
946 :
1958 rhaas 947 138 : Assert(node->ss.ps.state->es_query_dsa != NULL);
948 :
1970 andres 949 138 : pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
2223 rhaas 950 138 : node->pstate = pstate;
951 :
952 138 : snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
1490 andres 953 138 : table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
2223 rhaas 954 138 : }
|