Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * hashsearch.c
4 : * search code for postgres hash tables
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/hash/hashsearch.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/hash.h"
18 : #include "access/relscan.h"
19 : #include "miscadmin.h"
20 : #include "pgstat.h"
21 : #include "storage/predicate.h"
22 : #include "utils/rel.h"
23 :
24 : static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP,
25 : ScanDirection dir);
26 : static int _hash_load_qualified_items(IndexScanDesc scan, Page page,
27 : OffsetNumber offnum, ScanDirection dir);
28 : static inline void _hash_saveitem(HashScanOpaque so, int itemIndex,
29 : OffsetNumber offnum, IndexTuple itup);
30 : static void _hash_readnext(IndexScanDesc scan, Buffer *bufp,
31 : Page *pagep, HashPageOpaque *opaquep);
32 :
33 : /*
34 : * _hash_next() -- Get the next item in a scan.
35 : *
36 : * On entry, so->currPos describes the current page, which may
37 : * be pinned but not locked, and so->currPos.itemIndex identifies
38 : * which item was previously returned.
39 : *
40 : * On successful exit, scan->xs_ctup.t_self is set to the TID
41 : * of the next heap tuple. so->currPos is updated as needed.
42 : *
43 : * On failure exit (no more tuples), we return false with pin
44 : * held on bucket page but no pins or locks held on overflow
45 : * page.
46 : */
47 : bool
9770 scrappy 48 CBC 50577 : _hash_next(IndexScanDesc scan, ScanDirection dir)
49 : {
7157 tgl 50 50577 : Relation rel = scan->indexRelation;
51 50577 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
52 : HashScanPosItem *currItem;
53 : BlockNumber blkno;
54 : Buffer buf;
2025 rhaas 55 50577 : bool end_of_scan = false;
56 :
57 : /*
58 : * Advance to the next tuple on the current page; or if done, try to read
59 : * data from the next or previous page based on the scan direction. Before
60 : * moving to the next or previous page make sure that we deal with all the
61 : * killed items.
62 : */
63 50577 : if (ScanDirectionIsForward(dir))
64 : {
65 34077 : if (++so->currPos.itemIndex > so->currPos.lastItem)
66 : {
67 295 : if (so->numKilled > 0)
2025 rhaas 68 UBC 0 : _hash_kill_items(scan);
69 :
2025 rhaas 70 CBC 295 : blkno = so->currPos.nextPage;
71 295 : if (BlockNumberIsValid(blkno))
72 : {
73 78 : buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
74 78 : TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
75 78 : if (!_hash_readpage(scan, &buf, dir))
2025 rhaas 76 UBC 0 : end_of_scan = true;
77 : }
78 : else
2025 rhaas 79 CBC 217 : end_of_scan = true;
80 : }
81 : }
82 : else
83 : {
84 16500 : if (--so->currPos.itemIndex < so->currPos.firstItem)
85 : {
86 42 : if (so->numKilled > 0)
2025 rhaas 87 UBC 0 : _hash_kill_items(scan);
88 :
2025 rhaas 89 CBC 42 : blkno = so->currPos.prevPage;
90 42 : if (BlockNumberIsValid(blkno))
91 : {
92 39 : buf = _hash_getbuf(rel, blkno, HASH_READ,
93 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
94 39 : TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
95 :
96 : /*
97 : * We always maintain the pin on bucket page for whole scan
98 : * operation, so releasing the additional pin we have acquired
99 : * here.
100 : */
101 39 : if (buf == so->hashso_bucket_buf ||
102 36 : buf == so->hashso_split_bucket_buf)
103 3 : _hash_dropbuf(rel, buf);
104 :
105 39 : if (!_hash_readpage(scan, &buf, dir))
2025 rhaas 106 UBC 0 : end_of_scan = true;
107 : }
108 : else
2025 rhaas 109 CBC 3 : end_of_scan = true;
110 : }
111 : }
112 :
113 50577 : if (end_of_scan)
114 : {
115 220 : _hash_dropscanbuf(rel, so);
116 220 : HashScanPosInvalidate(so->currPos);
7629 tgl 117 220 : return false;
118 : }
119 :
120 : /* OK, itemIndex says what to return */
2025 rhaas 121 50357 : currItem = &so->currPos.items[so->currPos.itemIndex];
1490 andres 122 50357 : scan->xs_heaptid = currItem->heapTid;
123 :
7629 tgl 124 50357 : return true;
125 : }
126 :
127 : /*
128 : * Advance to next page in a bucket, if any. If we are scanning the bucket
129 : * being populated during split operation then this function advances to the
130 : * bucket being split after the last bucket page of bucket being populated.
131 : */
132 : static void
2321 rhaas 133 111 : _hash_readnext(IndexScanDesc scan,
134 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
135 : {
136 : BlockNumber blkno;
137 111 : Relation rel = scan->indexRelation;
138 111 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
139 111 : bool block_found = false;
140 :
9345 bruce 141 111 : blkno = (*opaquep)->hasho_nextblkno;
142 :
143 : /*
144 : * Retain the pin on primary bucket page till the end of scan. Refer the
145 : * comments in _hash_first to know the reason of retaining pin.
146 : */
2321 rhaas 147 111 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
2298 148 75 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
149 : else
2321 150 36 : _hash_relbuf(rel, *bufp);
151 :
9345 bruce 152 111 : *bufp = InvalidBuffer;
153 : /* check for interrupts while we're not holding any buffer lock */
5087 tgl 154 111 : CHECK_FOR_INTERRUPTS();
9345 bruce 155 111 : if (BlockNumberIsValid(blkno))
156 : {
5820 tgl 157 39 : *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
2321 rhaas 158 39 : block_found = true;
159 : }
160 72 : else if (so->hashso_buc_populated && !so->hashso_buc_split)
161 : {
162 : /*
163 : * end of bucket, scan bucket being split if there was a split in
164 : * progress at the start of scan.
165 : */
2321 rhaas 166 UBC 0 : *bufp = so->hashso_split_bucket_buf;
167 :
168 : /*
169 : * buffer for bucket being split must be valid as we acquire the pin
170 : * on it before the start of scan and retain it till end of scan.
171 : */
172 0 : Assert(BufferIsValid(*bufp));
173 :
2298 174 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
1828 teodor 175 0 : PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot);
176 :
177 : /*
178 : * setting hashso_buc_split to true indicates that we are scanning
179 : * bucket being split.
180 : */
2321 rhaas 181 0 : so->hashso_buc_split = true;
182 :
183 0 : block_found = true;
184 : }
185 :
2321 rhaas 186 CBC 111 : if (block_found)
187 : {
2545 kgrittn 188 39 : *pagep = BufferGetPage(*bufp);
2217 rhaas 189 39 : TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
373 michael 190 39 : *opaquep = HashPageGetOpaque(*pagep);
191 : }
9770 scrappy 192 111 : }
193 :
194 : /*
195 : * Advance to previous page in a bucket, if any. If the current scan has
196 : * started during split operation then this function advances to bucket
197 : * being populated after the first bucket page of bucket being split.
198 : */
199 : static void
2321 rhaas 200 UBC 0 : _hash_readprev(IndexScanDesc scan,
201 : Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
202 : {
203 : BlockNumber blkno;
204 0 : Relation rel = scan->indexRelation;
205 0 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
206 : bool haveprevblk;
207 :
9345 bruce 208 0 : blkno = (*opaquep)->hasho_prevblkno;
209 :
210 : /*
211 : * Retain the pin on primary bucket page till the end of scan. Refer the
212 : * comments in _hash_first to know the reason of retaining pin.
213 : */
2321 rhaas 214 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
215 : {
2298 216 0 : LockBuffer(*bufp, BUFFER_LOCK_UNLOCK);
2252 217 0 : haveprevblk = false;
218 : }
219 : else
220 : {
2321 221 0 : _hash_relbuf(rel, *bufp);
2252 222 0 : haveprevblk = true;
223 : }
224 :
9345 bruce 225 0 : *bufp = InvalidBuffer;
226 : /* check for interrupts while we're not holding any buffer lock */
5087 tgl 227 0 : CHECK_FOR_INTERRUPTS();
228 :
2252 rhaas 229 0 : if (haveprevblk)
230 : {
231 0 : Assert(BlockNumberIsValid(blkno));
5820 tgl 232 0 : *bufp = _hash_getbuf(rel, blkno, HASH_READ,
233 : LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
2545 kgrittn 234 0 : *pagep = BufferGetPage(*bufp);
2217 rhaas 235 0 : TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
373 michael 236 0 : *opaquep = HashPageGetOpaque(*pagep);
237 :
238 : /*
239 : * We always maintain the pin on bucket page for whole scan operation,
240 : * so releasing the additional pin we have acquired here.
241 : */
2321 rhaas 242 0 : if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf)
243 0 : _hash_dropbuf(rel, *bufp);
244 : }
245 0 : else if (so->hashso_buc_populated && so->hashso_buc_split)
246 : {
247 : /*
248 : * end of bucket, scan bucket being populated if there was a split in
249 : * progress at the start of scan.
250 : */
251 0 : *bufp = so->hashso_bucket_buf;
252 :
253 : /*
254 : * buffer for bucket being populated must be valid as we acquire the
255 : * pin on it before the start of scan and retain it till end of scan.
256 : */
257 0 : Assert(BufferIsValid(*bufp));
258 :
2298 259 0 : LockBuffer(*bufp, BUFFER_LOCK_SHARE);
2321 260 0 : *pagep = BufferGetPage(*bufp);
373 michael 261 0 : *opaquep = HashPageGetOpaque(*pagep);
262 :
263 : /* move to the end of bucket chain */
2321 rhaas 264 0 : while (BlockNumberIsValid((*opaquep)->hasho_nextblkno))
265 0 : _hash_readnext(scan, bufp, pagep, opaquep);
266 :
267 : /*
268 : * setting hashso_buc_split to false indicates that we are scanning
269 : * bucket being populated.
270 : */
271 0 : so->hashso_buc_split = false;
272 : }
9770 scrappy 273 0 : }
274 :
275 : /*
276 : * _hash_first() -- Find the first item in a scan.
277 : *
278 : * We find the first item (or, if backward scan, the last item) in the
279 : * index that satisfies the qualification associated with the scan
280 : * descriptor.
281 : *
282 : * On successful exit, if the page containing current index tuple is an
283 : * overflow page, both pin and lock are released whereas if it is a bucket
284 : * page then it is pinned but not locked and data about the matching
285 : * tuple(s) on the page has been loaded into so->currPos,
286 : * scan->xs_ctup.t_self is set to the heap TID of the current tuple.
287 : *
288 : * On failure exit (no more tuples), we return false, with pin held on
289 : * bucket page but no pins or locks held on overflow page.
290 : */
291 : bool
9770 scrappy 292 CBC 293 : _hash_first(IndexScanDesc scan, ScanDirection dir)
293 : {
7157 tgl 294 293 : Relation rel = scan->indexRelation;
295 293 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
296 : ScanKey cur;
297 : uint32 hashkey;
298 : Bucket bucket;
299 : Buffer buf;
300 : Page page;
301 : HashPageOpaque opaque;
302 : HashScanPosItem *currItem;
303 :
5796 304 293 : pgstat_count_index_scan(rel);
305 :
306 : /*
307 : * We do not support hash scans with no index qualification, because we
308 : * would have to read the whole index rather than just one bucket. That
309 : * creates a whole raft of problems, since we haven't got a practical way
310 : * to lock all the buckets against splits or compactions.
311 : */
7157 312 293 : if (scan->numberOfKeys < 1)
7157 tgl 313 UBC 0 : ereport(ERROR,
314 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
315 : errmsg("hash indexes do not support whole-index scans")));
316 :
317 : /* There may be more than one index qual, but we hash only the first */
5913 tgl 318 CBC 293 : cur = &scan->keyData[0];
319 :
320 : /* We support only single-column hash indexes */
321 293 : Assert(cur->sk_attno == 1);
322 : /* And there's only one operator strategy, too */
323 293 : Assert(cur->sk_strategy == HTEqualStrategyNumber);
324 :
325 : /*
326 : * If the constant in the index qual is NULL, assume it cannot match any
327 : * items in the index.
328 : */
329 293 : if (cur->sk_flags & SK_ISNULL)
7157 tgl 330 UBC 0 : return false;
331 :
332 : /*
333 : * Okay to compute the hash key. We want to do this before acquiring any
334 : * locks, in case a user-defined hash function happens to be slow.
335 : *
336 : * If scankey operator is not a cross-type comparison, we can use the
337 : * cached hash function; otherwise gotta look it up in the catalogs.
338 : *
339 : * We support the convention that sk_subtype == InvalidOid means the
340 : * opclass input type; this is a hack to simplify life for ScanKeyInit().
341 : */
5913 tgl 342 CBC 293 : if (cur->sk_subtype == rel->rd_opcintype[0] ||
5913 tgl 343 UBC 0 : cur->sk_subtype == InvalidOid)
5913 tgl 344 CBC 293 : hashkey = _hash_datum2hashkey(rel, cur->sk_argument);
345 : else
5913 tgl 346 UBC 0 : hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
347 : cur->sk_subtype);
348 :
5319 tgl 349 CBC 293 : so->hashso_sk_hash = hashkey;
350 :
2252 rhaas 351 293 : buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
1828 teodor 352 293 : PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
2545 kgrittn 353 293 : page = BufferGetPage(buf);
2217 rhaas 354 293 : TestForOldSnapshot(scan->xs_snapshot, rel, page);
373 michael 355 292 : opaque = HashPageGetOpaque(page);
2252 rhaas 356 292 : bucket = opaque->hasho_bucket;
357 :
2321 358 292 : so->hashso_bucket_buf = buf;
359 :
360 : /*
361 : * If a bucket split is in progress, then while scanning the bucket being
362 : * populated, we need to skip tuples that were copied from bucket being
363 : * split. We also need to maintain a pin on the bucket being split to
364 : * ensure that split-cleanup work done by vacuum doesn't remove tuples
365 : * from it till this scan is done. We need to maintain a pin on the
366 : * bucket being populated to ensure that vacuum doesn't squeeze that
367 : * bucket till this scan is complete; otherwise, the ordering of tuples
368 : * can't be maintained during forward and backward scans. Here, we have
369 : * to be cautious about locking order: first, acquire the lock on bucket
370 : * being split; then, release the lock on it but not the pin; then,
371 : * acquire a lock on bucket being populated and again re-verify whether
372 : * the bucket split is still in progress. Acquiring the lock on bucket
373 : * being split first ensures that the vacuum waits for this scan to
374 : * finish.
375 : */
376 292 : if (H_BUCKET_BEING_POPULATED(opaque))
377 : {
378 : BlockNumber old_blkno;
379 : Buffer old_buf;
380 :
2321 rhaas 381 UBC 0 : old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket);
382 :
383 : /*
384 : * release the lock on new bucket and re-acquire it after acquiring
385 : * the lock on old bucket.
386 : */
2298 387 0 : LockBuffer(buf, BUFFER_LOCK_UNLOCK);
388 :
2321 389 0 : old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
2217 390 0 : TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
391 :
392 : /*
393 : * remember the split bucket buffer so as to use it later for
394 : * scanning.
395 : */
2321 396 0 : so->hashso_split_bucket_buf = old_buf;
2298 397 0 : LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
398 :
399 0 : LockBuffer(buf, BUFFER_LOCK_SHARE);
2321 400 0 : page = BufferGetPage(buf);
373 michael 401 0 : opaque = HashPageGetOpaque(page);
2321 rhaas 402 0 : Assert(opaque->hasho_bucket == bucket);
403 :
404 0 : if (H_BUCKET_BEING_POPULATED(opaque))
405 0 : so->hashso_buc_populated = true;
406 : else
407 : {
408 0 : _hash_dropbuf(rel, so->hashso_split_bucket_buf);
409 0 : so->hashso_split_bucket_buf = InvalidBuffer;
410 : }
411 : }
412 :
413 : /* If a backwards scan is requested, move to the end of the chain */
9345 bruce 414 CBC 292 : if (ScanDirectionIsBackward(dir))
415 : {
416 : /*
417 : * Backward scans that start during split needs to start from end of
418 : * bucket being split.
419 : */
2321 rhaas 420 42 : while (BlockNumberIsValid(opaque->hasho_nextblkno) ||
421 3 : (so->hashso_buc_populated && !so->hashso_buc_split))
422 39 : _hash_readnext(scan, &buf, &page, &opaque);
423 : }
424 :
425 : /* remember which buffer we have pinned, if any */
2025 426 292 : Assert(BufferIsInvalid(so->currPos.buf));
427 292 : so->currPos.buf = buf;
428 :
429 : /* Now find all the tuples satisfying the qualification from a page */
430 292 : if (!_hash_readpage(scan, &buf, dir))
7629 tgl 431 72 : return false;
432 :
433 : /* OK, itemIndex says what to return */
2025 rhaas 434 220 : currItem = &so->currPos.items[so->currPos.itemIndex];
1490 andres 435 220 : scan->xs_heaptid = currItem->heapTid;
436 :
437 : /* if we're here, _hash_readpage found a valid tuples */
7629 tgl 438 220 : return true;
439 : }
440 :
441 : /*
442 : * _hash_readpage() -- Load data from current index page into so->currPos
443 : *
444 : * We scan all the items in the current index page and save them into
445 : * so->currPos if it satisfies the qualification. If no matching items
446 : * are found in the current page, we move to the next or previous page
447 : * in a bucket chain as indicated by the direction.
448 : *
449 : * Return true if any matching items are found else return false.
450 : */
451 : static bool
2025 rhaas 452 409 : _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
453 : {
7157 tgl 454 409 : Relation rel = scan->indexRelation;
455 409 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
456 : Buffer buf;
457 : Page page;
458 : HashPageOpaque opaque;
459 : OffsetNumber offnum;
460 : uint16 itemIndex;
461 :
9345 bruce 462 409 : buf = *bufP;
2025 rhaas 463 409 : Assert(BufferIsValid(buf));
6363 tgl 464 409 : _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
2545 kgrittn 465 409 : page = BufferGetPage(buf);
373 michael 466 409 : opaque = HashPageGetOpaque(page);
467 :
2025 rhaas 468 409 : so->currPos.buf = buf;
469 409 : so->currPos.currPage = BufferGetBlockNumber(buf);
470 :
471 409 : if (ScanDirectionIsForward(dir))
472 : {
473 367 : BlockNumber prev_blkno = InvalidBlockNumber;
474 :
475 : for (;;)
476 : {
477 : /* new page, locate starting position by binary search */
478 367 : offnum = _hash_binsearch(page, so->hashso_sk_hash);
479 :
480 367 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
481 :
482 367 : if (itemIndex != 0)
9344 bruce 483 295 : break;
484 :
485 : /*
486 : * Could not find any matching tuples in the current page, move to
487 : * the next page. Before leaving the current page, deal with any
488 : * killed items.
489 : */
2025 rhaas 490 72 : if (so->numKilled > 0)
2025 rhaas 491 UBC 0 : _hash_kill_items(scan);
492 :
493 : /*
494 : * If this is a primary bucket page, hasho_prevblkno is not a real
495 : * block number.
496 : */
2025 rhaas 497 CBC 72 : if (so->currPos.buf == so->hashso_bucket_buf ||
2025 rhaas 498 UBC 0 : so->currPos.buf == so->hashso_split_bucket_buf)
2025 rhaas 499 CBC 72 : prev_blkno = InvalidBlockNumber;
500 : else
2025 rhaas 501 UBC 0 : prev_blkno = opaque->hasho_prevblkno;
502 :
2025 rhaas 503 CBC 72 : _hash_readnext(scan, &buf, &page, &opaque);
504 72 : if (BufferIsValid(buf))
505 : {
2025 rhaas 506 UBC 0 : so->currPos.buf = buf;
507 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
508 : }
509 : else
510 : {
511 : /*
512 : * Remember next and previous block numbers for scrollable
513 : * cursors to know the start position and return false
514 : * indicating that no more matching tuples were found. Also,
515 : * don't reset currPage or lsn, because we expect
516 : * _hash_kill_items to be called for the old page after this
517 : * function returns.
518 : */
2025 rhaas 519 CBC 72 : so->currPos.prevPage = prev_blkno;
520 72 : so->currPos.nextPage = InvalidBlockNumber;
521 72 : so->currPos.buf = buf;
522 72 : return false;
523 : }
524 : }
525 :
526 295 : so->currPos.firstItem = 0;
527 295 : so->currPos.lastItem = itemIndex - 1;
528 295 : so->currPos.itemIndex = 0;
529 : }
530 : else
531 : {
532 42 : BlockNumber next_blkno = InvalidBlockNumber;
533 :
534 : for (;;)
535 : {
536 : /* new page, locate starting position by binary search */
537 42 : offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
538 :
539 42 : itemIndex = _hash_load_qualified_items(scan, page, offnum, dir);
540 :
541 42 : if (itemIndex != MaxIndexTuplesPerPage)
9344 bruce 542 42 : break;
543 :
544 : /*
545 : * Could not find any matching tuples in the current page, move to
546 : * the previous page. Before leaving the current page, deal with
547 : * any killed items.
548 : */
2025 rhaas 549 UBC 0 : if (so->numKilled > 0)
550 0 : _hash_kill_items(scan);
551 :
552 0 : if (so->currPos.buf == so->hashso_bucket_buf ||
553 0 : so->currPos.buf == so->hashso_split_bucket_buf)
554 0 : next_blkno = opaque->hasho_nextblkno;
555 :
556 0 : _hash_readprev(scan, &buf, &page, &opaque);
557 0 : if (BufferIsValid(buf))
558 : {
559 0 : so->currPos.buf = buf;
560 0 : so->currPos.currPage = BufferGetBlockNumber(buf);
561 : }
562 : else
563 : {
564 : /*
565 : * Remember next and previous block numbers for scrollable
566 : * cursors to know the start position and return false
567 : * indicating that no more matching tuples were found. Also,
568 : * don't reset currPage or lsn, because we expect
569 : * _hash_kill_items to be called for the old page after this
570 : * function returns.
571 : */
572 0 : so->currPos.prevPage = InvalidBlockNumber;
573 0 : so->currPos.nextPage = next_blkno;
574 0 : so->currPos.buf = buf;
575 0 : return false;
576 : }
577 : }
578 :
2025 rhaas 579 CBC 42 : so->currPos.firstItem = itemIndex;
580 42 : so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
581 42 : so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
582 : }
583 :
584 337 : if (so->currPos.buf == so->hashso_bucket_buf ||
585 117 : so->currPos.buf == so->hashso_split_bucket_buf)
586 : {
587 220 : so->currPos.prevPage = InvalidBlockNumber;
588 220 : so->currPos.nextPage = opaque->hasho_nextblkno;
589 220 : LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
590 : }
591 : else
592 : {
593 117 : so->currPos.prevPage = opaque->hasho_prevblkno;
594 117 : so->currPos.nextPage = opaque->hasho_nextblkno;
595 117 : _hash_relbuf(rel, so->currPos.buf);
596 117 : so->currPos.buf = InvalidBuffer;
597 : }
598 :
599 337 : Assert(so->currPos.firstItem <= so->currPos.lastItem);
600 337 : return true;
601 : }
602 :
603 : /*
604 : * Load all the qualified items from a current index page
605 : * into so->currPos. Helper function for _hash_readpage.
606 : */
607 : static int
608 409 : _hash_load_qualified_items(IndexScanDesc scan, Page page,
609 : OffsetNumber offnum, ScanDirection dir)
610 : {
611 409 : HashScanOpaque so = (HashScanOpaque) scan->opaque;
612 : IndexTuple itup;
613 : int itemIndex;
614 : OffsetNumber maxoff;
615 :
616 409 : maxoff = PageGetMaxOffsetNumber(page);
617 :
618 409 : if (ScanDirectionIsForward(dir))
619 : {
620 : /* load items[] in ascending order */
621 367 : itemIndex = 0;
622 :
623 34444 : while (offnum <= maxoff)
624 : {
625 34239 : Assert(offnum >= FirstOffsetNumber);
626 34239 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
627 :
628 : /*
629 : * skip the tuples that are moved by split operation for the scan
630 : * that has started when split was in progress. Also, skip the
631 : * tuples that are marked as dead.
632 : */
633 34239 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
2025 rhaas 634 UBC 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
2025 rhaas 635 CBC 34239 : (scan->ignore_killed_tuples &&
636 34239 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
637 : {
2025 rhaas 638 UBC 0 : offnum = OffsetNumberNext(offnum); /* move forward */
639 0 : continue;
640 : }
641 :
2025 rhaas 642 CBC 68316 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
643 34077 : _hash_checkqual(scan, itup))
644 : {
645 : /* tuple is qualified, so remember it */
646 34077 : _hash_saveitem(so, itemIndex, offnum, itup);
647 34077 : itemIndex++;
648 : }
649 : else
650 : {
651 : /*
652 : * No more matching tuples exist in this page. so, exit while
653 : * loop.
654 : */
655 : break;
656 : }
657 :
658 34077 : offnum = OffsetNumberNext(offnum);
659 : }
660 :
661 367 : Assert(itemIndex <= MaxIndexTuplesPerPage);
662 367 : return itemIndex;
663 : }
664 : else
665 : {
666 : /* load items[] in descending order */
667 42 : itemIndex = MaxIndexTuplesPerPage;
668 :
669 16542 : while (offnum >= FirstOffsetNumber)
670 : {
671 16500 : Assert(offnum <= maxoff);
672 16500 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
673 :
674 : /*
675 : * skip the tuples that are moved by split operation for the scan
676 : * that has started when split was in progress. Also, skip the
677 : * tuples that are marked as dead.
678 : */
679 16500 : if ((so->hashso_buc_populated && !so->hashso_buc_split &&
2025 rhaas 680 UBC 0 : (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) ||
2025 rhaas 681 CBC 16500 : (scan->ignore_killed_tuples &&
682 16500 : (ItemIdIsDead(PageGetItemId(page, offnum)))))
683 : {
2025 rhaas 684 UBC 0 : offnum = OffsetNumberPrev(offnum); /* move back */
685 0 : continue;
686 : }
687 :
2025 rhaas 688 CBC 33000 : if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) &&
689 16500 : _hash_checkqual(scan, itup))
690 : {
691 16500 : itemIndex--;
692 : /* tuple is qualified, so remember it */
693 16500 : _hash_saveitem(so, itemIndex, offnum, itup);
694 : }
695 : else
696 : {
697 : /*
698 : * No more matching tuples exist in this page. so, exit while
699 : * loop.
700 : */
701 : break;
702 : }
703 :
704 16500 : offnum = OffsetNumberPrev(offnum);
705 : }
706 :
707 42 : Assert(itemIndex >= 0);
708 42 : return itemIndex;
709 : }
710 : }
711 :
712 : /* Save an index item into so->currPos.items[itemIndex] */
713 : static inline void
714 50577 : _hash_saveitem(HashScanOpaque so, int itemIndex,
715 : OffsetNumber offnum, IndexTuple itup)
716 : {
717 50577 : HashScanPosItem *currItem = &so->currPos.items[itemIndex];
718 :
719 50577 : currItem->heapTid = itup->t_tid;
720 50577 : currItem->indexOffset = offnum;
9770 scrappy 721 50577 : }
|