Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginget.c
4 : * fetch tuples from a GIN scan.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginget.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/relscan.h"
19 : #include "common/pg_prng.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/datum.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : /* GUC parameter */
27 : int GinFuzzySearchLimit = 0;
28 :
29 : typedef struct pendingPosition
30 : {
31 : Buffer pendingBuffer;
32 : OffsetNumber firstOffset;
33 : OffsetNumber lastOffset;
34 : ItemPointerData item;
35 : bool *hasMatchKey;
36 : } pendingPosition;
37 :
38 :
39 : /*
40 : * Goes to the next page if current offset is outside of bounds
41 : */
42 : static bool
1836 teodor 43 CBC 203804 : moveRightIfItNeeded(GinBtreeData *btree, GinBtreeStack *stack, Snapshot snapshot)
44 : {
2545 kgrittn 45 203804 : Page page = BufferGetPage(stack->buffer);
46 :
5050 bruce 47 203804 : if (stack->off > PageGetMaxOffsetNumber(page))
48 : {
49 : /*
50 : * We scanned the whole page, so we should take right page
51 : */
52 1895 : if (GinPageRightMost(page))
53 183 : return false; /* no more pages */
54 :
3439 heikki.linnakangas 55 1712 : stack->buffer = ginStepRight(stack->buffer, btree->index, GIN_SHARE);
56 1712 : stack->blkno = BufferGetBlockNumber(stack->buffer);
5441 tgl 57 1712 : stack->off = FirstOffsetNumber;
1801 teodor 58 1712 : PredicateLockPage(btree->index, stack->blkno, snapshot);
59 : }
60 :
5441 tgl 61 203621 : return true;
62 : }
63 :
64 : /*
65 : * Scan all pages of a posting tree and save all its heap ItemPointers
66 : * in scanEntry->matchBitmap
67 : */
68 : static void
4475 69 6 : scanPostingTree(Relation index, GinScanEntry scanEntry,
70 : BlockNumber rootPostingTree, Snapshot snapshot)
71 : {
72 : GinBtreeData btree;
73 : GinBtreeStack *stack;
74 : Buffer buffer;
75 : Page page;
76 :
77 : /* Descend to the leftmost leaf page */
2557 kgrittn 78 6 : stack = ginScanBeginPostingTree(&btree, index, rootPostingTree, snapshot);
3427 heikki.linnakangas 79 6 : buffer = stack->buffer;
80 :
5441 tgl 81 6 : IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
82 :
3427 heikki.linnakangas 83 6 : freeGinBtreeStack(stack);
84 :
85 : /*
86 : * Loop iterates through all leaf pages of posting tree
87 : */
88 : for (;;)
89 : {
2545 kgrittn 90 15 : page = BufferGetPage(buffer);
3364 heikki.linnakangas 91 15 : if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0)
92 : {
3260 bruce 93 15 : int n = GinDataLeafPageGetItemsToTbm(page, scanEntry->matchBitmap);
94 :
3364 heikki.linnakangas 95 15 : scanEntry->predictNumberResult += n;
96 : }
97 :
5050 bruce 98 15 : if (GinPageRightMost(page))
4475 tgl 99 6 : break; /* no more pages */
100 :
3439 heikki.linnakangas 101 9 : buffer = ginStepRight(buffer, index, GIN_SHARE);
102 : }
103 :
4475 tgl 104 6 : UnlockReleaseBuffer(buffer);
5441 105 6 : }
106 :
107 : /*
108 : * Collects TIDs into scanEntry->matchBitmap for all heap tuples that
109 : * match the search entry. This supports three different match modes:
110 : *
111 : * 1. Partial-match support: scan from current point until the
112 : * comparePartialFn says we're done.
113 : * 2. SEARCH_MODE_ALL: scan from current point (which should be first
114 : * key for the current attnum) until we hit null items or end of attnum
115 : * 3. SEARCH_MODE_EVERYTHING: scan from current point (which should be first
116 : * key for the current attnum) until we hit end of attnum
117 : *
118 : * Returns true if done, false if it's necessary to restart scan from scratch
119 : */
120 : static bool
4475 121 267 : collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack,
122 : GinScanEntry scanEntry, Snapshot snapshot)
123 : {
124 : OffsetNumber attnum;
125 : Form_pg_attribute attr;
126 :
127 : /* Initialize empty bitmap result */
2223 rhaas 128 267 : scanEntry->matchBitmap = tbm_create(work_mem * 1024L, NULL);
129 :
130 : /* Null query cannot partial-match anything */
4475 tgl 131 267 : if (scanEntry->isPartialMatch &&
132 126 : scanEntry->queryCategory != GIN_CAT_NORM_KEY)
4475 tgl 133 UBC 0 : return true;
134 :
135 : /* Locate tupdesc entry for key column (for attbyval/attlen data) */
4475 tgl 136 CBC 267 : attnum = scanEntry->attnum;
2058 andres 137 267 : attr = TupleDescAttr(btree->ginstate->origTupdesc, attnum - 1);
138 :
139 : /*
140 : * Predicate lock entry leaf page, following pages will be locked by
141 : * moveRightIfItNeeded()
142 : */
1801 teodor 143 267 : PredicateLockPage(btree->index, stack->buffer, snapshot);
144 :
145 : for (;;)
5441 tgl 146 203531 : {
147 : Page page;
148 : IndexTuple itup;
149 : Datum idatum;
150 : GinNullCategory icategory;
151 :
152 : /*
153 : * stack->off points to the interested entry, buffer is already locked
154 : */
1836 teodor 155 203798 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
5441 tgl 156 267 : return true;
157 :
2545 kgrittn 158 203615 : page = BufferGetPage(stack->buffer);
159 203615 : TestForOldSnapshot(snapshot, btree->index, page);
5441 tgl 160 203615 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
161 :
162 : /*
163 : * If tuple stores another attribute then stop scan
164 : */
4475 165 203615 : if (gintuple_get_attrnum(btree->ginstate, itup) != attnum)
5385 tgl 166 UBC 0 : return true;
167 :
168 : /* Safe to fetch attribute value */
4475 tgl 169 CBC 203615 : idatum = gintuple_get_key(btree->ginstate, itup, &icategory);
170 :
171 : /*
172 : * Check for appropriate scan stop conditions
173 : */
174 203615 : if (scanEntry->isPartialMatch)
175 : {
176 : int32 cmp;
177 :
178 : /*
179 : * In partial match, stop scan at any null (including
180 : * placeholders); partial matches never match nulls
181 : */
182 662 : if (icategory != GIN_CAT_NORM_KEY)
183 5 : return true;
184 :
185 : /*----------
186 : * Check of partial match.
187 : * case cmp == 0 => match
188 : * case cmp > 0 => not match and finish scan
189 : * case cmp < 0 => not match and continue scan
190 : *----------
191 : */
4380 192 657 : cmp = DatumGetInt32(FunctionCall4Coll(&btree->ginstate->comparePartialFn[attnum - 1],
2118 193 657 : btree->ginstate->supportCollation[attnum - 1],
194 : scanEntry->queryKey,
195 : idatum,
196 657 : UInt16GetDatum(scanEntry->strategy),
197 657 : PointerGetDatum(scanEntry->extra_data)));
198 :
4475 199 657 : if (cmp > 0)
200 65 : return true;
201 592 : else if (cmp < 0)
202 : {
203 30 : stack->off++;
204 30 : continue;
205 : }
206 : }
207 202953 : else if (scanEntry->searchMode == GIN_SEARCH_MODE_ALL)
208 : {
209 : /*
210 : * In ALL mode, we are not interested in null items, so we can
211 : * stop if we get to a null-item placeholder (which will be the
212 : * last entry for a given attnum). We do want to include NULL_KEY
213 : * and EMPTY_ITEM entries, though.
214 : */
215 202953 : if (icategory == GIN_CAT_NULL_ITEM)
216 14 : return true;
217 : }
218 :
219 : /*
220 : * OK, we want to return the TIDs listed in this entry.
221 : */
5050 bruce 222 203501 : if (GinIsPostingTree(itup))
223 : {
5441 tgl 224 6 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
225 :
226 : /*
227 : * We should unlock current page (but not unpin) during tree scan
228 : * to prevent deadlock with vacuum processes.
229 : *
230 : * We save current entry value (idatum) to be able to re-find our
231 : * tuple after re-locking
232 : */
4475 233 6 : if (icategory == GIN_CAT_NORM_KEY)
234 6 : idatum = datumCopy(idatum, attr->attbyval, attr->attlen);
235 :
5441 236 6 : LockBuffer(stack->buffer, GIN_UNLOCK);
237 :
238 : /*
239 : * Acquire predicate lock on the posting tree. We already hold a
240 : * lock on the entry page, but insertions to the posting tree
241 : * don't check for conflicts on that level.
242 : */
1801 teodor 243 6 : PredicateLockPage(btree->index, rootPostingTree, snapshot);
244 :
245 : /* Collect all the TIDs in this entry's posting tree */
2545 kgrittn 246 6 : scanPostingTree(btree->index, scanEntry, rootPostingTree,
247 : snapshot);
248 :
249 : /*
250 : * We lock again the entry page and while it was unlocked insert
251 : * might have occurred, so we need to re-find our position.
252 : */
5441 tgl 253 6 : LockBuffer(stack->buffer, GIN_SHARE);
2545 kgrittn 254 6 : page = BufferGetPage(stack->buffer);
5050 bruce 255 6 : if (!GinPageIsLeaf(page))
256 : {
257 : /*
258 : * Root page becomes non-leaf while we unlock it. We will
259 : * start again, this situation doesn't occur often - root can
260 : * became a non-leaf only once per life of index.
261 : */
5441 tgl 262 UBC 0 : return false;
263 : }
264 :
265 : /* Search forward to re-find idatum */
266 : for (;;)
267 : {
1836 teodor 268 CBC 6 : if (moveRightIfItNeeded(btree, stack, snapshot) == false)
955 tgl 269 UBC 0 : ereport(ERROR,
270 : (errcode(ERRCODE_INTERNAL_ERROR),
271 : errmsg("failed to re-find tuple within index \"%s\"",
272 : RelationGetRelationName(btree->index))));
273 :
2545 kgrittn 274 CBC 6 : page = BufferGetPage(stack->buffer);
5441 tgl 275 6 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
276 :
955 277 6 : if (gintuple_get_attrnum(btree->ginstate, itup) == attnum)
278 : {
279 : Datum newDatum;
280 : GinNullCategory newCategory;
281 :
282 6 : newDatum = gintuple_get_key(btree->ginstate, itup,
283 : &newCategory);
284 :
285 6 : if (ginCompareEntries(btree->ginstate, attnum,
286 : newDatum, newCategory,
287 : idatum, icategory) == 0)
288 6 : break; /* Found! */
289 : }
290 :
5441 tgl 291 UBC 0 : stack->off++;
292 : }
293 :
4475 tgl 294 CBC 6 : if (icategory == GIN_CAT_NORM_KEY && !attr->attbyval)
4475 tgl 295 UBC 0 : pfree(DatumGetPointer(idatum));
296 : }
297 : else
298 : {
299 : ItemPointer ipd;
300 : int nipd;
301 :
3364 heikki.linnakangas 302 CBC 203495 : ipd = ginReadTuple(btree->ginstate, scanEntry->attnum, itup, &nipd);
303 203495 : tbm_add_tuples(scanEntry->matchBitmap, ipd, nipd, false);
5050 bruce 304 203495 : scanEntry->predictNumberResult += GinGetNPosting(itup);
2550 tgl 305 203495 : pfree(ipd);
306 : }
307 :
308 : /*
309 : * Done with this entry, go to the next
310 : */
5441 311 203501 : stack->off++;
312 : }
313 : }
314 :
315 : /*
316 : * Start* functions setup beginning state of searches: finds correct buffer and pins it.
317 : */
318 : static void
2557 kgrittn 319 1846 : startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
320 : {
321 : GinBtreeData btreeEntry;
322 : GinBtreeStack *stackEntry;
323 : Page page;
324 : bool needUnlock;
325 :
4475 tgl 326 1846 : restartScanEntry:
5129 327 1846 : entry->buffer = InvalidBuffer;
4474 328 1846 : ItemPointerSetMin(&entry->curItem);
5129 329 1846 : entry->offset = InvalidOffsetNumber;
2583 330 1846 : if (entry->list)
2583 tgl 331 UBC 0 : pfree(entry->list);
5129 tgl 332 CBC 1846 : entry->list = NULL;
333 1846 : entry->nlist = 0;
4475 334 1846 : entry->matchBitmap = NULL;
335 1846 : entry->matchResult = NULL;
2062 peter_e 336 1846 : entry->reduceResult = false;
5129 tgl 337 1846 : entry->predictNumberResult = 0;
338 :
339 : /*
340 : * we should find entry, and begin scan of posting tree or just store
341 : * posting list in memory
342 : */
4475 343 1846 : ginPrepareEntryScan(&btreeEntry, entry->attnum,
344 1846 : entry->queryKey, entry->queryCategory,
345 : ginstate);
1564 akorotkov 346 1846 : stackEntry = ginFindLeafPage(&btreeEntry, true, false, snapshot);
2545 kgrittn 347 1846 : page = BufferGetPage(stackEntry->buffer);
348 :
349 : /* ginFindLeafPage() will have already checked snapshot age. */
2062 peter_e 350 1846 : needUnlock = true;
351 :
352 1846 : entry->isFinished = true;
353 :
4475 tgl 354 1846 : if (entry->isPartialMatch ||
355 1720 : entry->queryCategory == GIN_CAT_EMPTY_QUERY)
356 : {
357 : /*
358 : * btreeEntry.findItem locates the first item >= given search key.
359 : * (For GIN_CAT_EMPTY_QUERY, it will find the leftmost index item
360 : * because of the way the GIN_CAT_EMPTY_QUERY category code is
361 : * assigned.) We scan forward from there and collect all TIDs needed
362 : * for the entry type.
363 : */
5441 364 267 : btreeEntry.findItem(&btreeEntry, stackEntry);
2545 kgrittn 365 267 : if (collectMatchBitmap(&btreeEntry, stackEntry, entry, snapshot)
366 267 : == false)
367 : {
368 : /*
369 : * GIN tree was seriously restructured, so we will cleanup all
370 : * found data and rescan. See comments near 'return false' in
371 : * collectMatchBitmap()
372 : */
4475 tgl 373 UBC 0 : if (entry->matchBitmap)
374 : {
375 0 : if (entry->matchIterator)
376 0 : tbm_end_iterate(entry->matchIterator);
377 0 : entry->matchIterator = NULL;
378 0 : tbm_free(entry->matchBitmap);
379 0 : entry->matchBitmap = NULL;
380 : }
5441 381 0 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
382 0 : freeGinBtreeStack(stackEntry);
4475 383 0 : goto restartScanEntry;
384 : }
385 :
4475 tgl 386 CBC 267 : if (entry->matchBitmap && !tbm_is_empty(entry->matchBitmap))
387 : {
388 210 : entry->matchIterator = tbm_begin_iterate(entry->matchBitmap);
2062 peter_e 389 210 : entry->isFinished = false;
390 : }
391 : }
5441 tgl 392 1579 : else if (btreeEntry.findItem(&btreeEntry, stackEntry))
393 : {
5465 teodor 394 1089 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
395 :
396 1089 : if (GinIsPostingTree(itup))
397 : {
398 32 : BlockNumber rootPostingTree = GinGetPostingTree(itup);
399 : GinBtreeStack *stack;
400 : Page entrypage;
401 : ItemPointerData minItem;
402 :
403 : /*
404 : * This is an equality scan, so lock the root of the posting tree.
405 : * It represents a lock on the exact key value, and covers all the
406 : * items in the posting tree.
407 : */
1801 408 32 : PredicateLockPage(ginstate->index, rootPostingTree, snapshot);
409 :
410 : /*
411 : * We should unlock entry page before touching posting tree to
412 : * prevent deadlocks with vacuum processes. Because entry is never
413 : * deleted from page and posting tree is never reduced to the
414 : * posting list, we can unlock page after getting BlockNumber of
415 : * root of posting tree.
416 : */
6186 417 32 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
2062 peter_e 418 32 : needUnlock = false;
419 :
3357 heikki.linnakangas 420 32 : stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
421 : rootPostingTree, snapshot);
3427 422 32 : entry->buffer = stack->buffer;
423 :
424 : /*
425 : * We keep buffer pinned because we need to prevent deletion of
426 : * page during scan. See GIN's vacuum implementation. RefCount is
427 : * increased to keep buffer pinned after freeGinBtreeStack() call.
428 : */
5465 teodor 429 32 : IncrBufferRefCount(entry->buffer);
430 :
186 drowley 431 GNC 32 : entrypage = BufferGetPage(entry->buffer);
432 :
433 : /*
434 : * Load the first page into memory.
435 : */
3357 heikki.linnakangas 436 CBC 32 : ItemPointerSetMin(&minItem);
186 drowley 437 GNC 32 : entry->list = GinDataLeafPageGetItems(entrypage, &entry->nlist, minItem);
438 :
3364 heikki.linnakangas 439 CBC 32 : entry->predictNumberResult = stack->predictNumber * entry->nlist;
440 :
6031 bruce 441 32 : LockBuffer(entry->buffer, GIN_UNLOCK);
3427 heikki.linnakangas 442 32 : freeGinBtreeStack(stack);
2062 peter_e 443 32 : entry->isFinished = false;
444 : }
445 : else
446 : {
447 : /*
448 : * Lock the entry leaf page. This is more coarse-grained than
449 : * necessary, because it will conflict with any insertions that
450 : * land on the same leaf page, not only the exact key we searched
451 : * for. But locking an individual tuple would require updating
452 : * that lock whenever it moves because of insertions or vacuums,
453 : * which seems too complicated.
454 : */
1801 teodor 455 1057 : PredicateLockPage(ginstate->index,
456 : BufferGetBlockNumber(stackEntry->buffer),
457 : snapshot);
458 1057 : if (GinGetNPosting(itup) > 0)
459 : {
460 1054 : entry->list = ginReadTuple(ginstate, entry->attnum, itup,
461 : &entry->nlist);
462 1054 : entry->predictNumberResult = entry->nlist;
463 :
464 1054 : entry->isFinished = false;
465 : }
466 : }
467 : }
468 : else
469 : {
470 : /*
471 : * No entry found. Predicate lock the leaf page, to lock the place
472 : * where the entry would've been, had there been one.
473 : */
474 490 : PredicateLockPage(ginstate->index,
475 : BufferGetBlockNumber(stackEntry->buffer), snapshot);
476 : }
477 :
5465 478 1846 : if (needUnlock)
5050 bruce 479 1814 : LockBuffer(stackEntry->buffer, GIN_UNLOCK);
5465 teodor 480 1846 : freeGinBtreeStack(stackEntry);
6186 481 1846 : }
482 :
483 : /*
484 : * Comparison function for scan entry indexes. Sorts by predictNumberResult,
485 : * least frequent items first.
486 : */
487 : static int
3348 heikki.linnakangas 488 1737 : entryIndexByFrequencyCmp(const void *a1, const void *a2, void *arg)
489 : {
490 1737 : const GinScanKey key = (const GinScanKey) arg;
491 1737 : int i1 = *(const int *) a1;
492 1737 : int i2 = *(const int *) a2;
493 1737 : uint32 n1 = key->scanEntry[i1]->predictNumberResult;
494 1737 : uint32 n2 = key->scanEntry[i2]->predictNumberResult;
495 :
496 1737 : if (n1 < n2)
497 317 : return -1;
498 1420 : else if (n1 == n2)
499 558 : return 0;
500 : else
501 862 : return 1;
502 : }
503 :
504 : static void
505 852 : startScanKey(GinState *ginstate, GinScanOpaque so, GinScanKey key)
506 : {
507 852 : MemoryContext oldCtx = CurrentMemoryContext;
508 : int i;
509 : int j;
510 : int *entryIndexes;
511 :
4474 tgl 512 852 : ItemPointerSetMin(&key->curItem);
513 852 : key->curItemMatches = false;
514 852 : key->recheckCurItem = false;
515 852 : key->isFinished = false;
516 :
517 : /*
518 : * Divide the entries into two distinct sets: required and additional.
519 : * Additional entries are not enough for a match alone, without any items
520 : * from the required set, but are needed by the consistent function to
521 : * decide if an item matches. When scanning, we can skip over items from
522 : * additional entries that have no corresponding matches in any of the
523 : * required entries. That speeds up queries like "frequent & rare"
524 : * considerably, if the frequent term can be put in the additional set.
525 : *
526 : * There can be many legal ways to divide them entries into these two
527 : * sets. A conservative division is to just put everything in the required
528 : * set, but the more you can put in the additional set, the more you can
529 : * skip during the scan. To maximize skipping, we try to put as many
530 : * frequent items as possible into additional, and less frequent ones into
531 : * required. To do that, sort the entries by frequency
532 : * (predictNumberResult), and put entries into the required set in that
533 : * order, until the consistent function says that none of the remaining
534 : * entries can form a match, without any items from the required set. The
535 : * rest go to the additional set.
536 : *
537 : * Exclude-only scan keys are known to have no required entries.
538 : */
1177 akorotkov 539 852 : if (key->excludeOnly)
540 : {
541 19 : MemoryContextSwitchTo(so->keyCtx);
542 :
543 19 : key->nrequired = 0;
544 19 : key->nadditional = key->nentries;
545 19 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
546 22 : for (i = 0; i < key->nadditional; i++)
547 3 : key->additionalEntries[i] = key->scanEntry[i];
548 : }
549 833 : else if (key->nentries > 1)
550 : {
3348 heikki.linnakangas 551 275 : MemoryContextSwitchTo(so->tempCtx);
552 :
553 275 : entryIndexes = (int *) palloc(sizeof(int) * key->nentries);
554 1560 : for (i = 0; i < key->nentries; i++)
555 1285 : entryIndexes[i] = i;
556 275 : qsort_arg(entryIndexes, key->nentries, sizeof(int),
557 : entryIndexByFrequencyCmp, key);
558 :
559 917 : for (i = 0; i < key->nentries - 1; i++)
560 : {
561 : /* Pass all entries <= i as FALSE, and the rest as MAYBE */
562 34779 : for (j = 0; j <= i; j++)
563 33928 : key->entryRes[entryIndexes[j]] = GIN_FALSE;
564 35183 : for (j = i + 1; j < key->nentries; j++)
565 34332 : key->entryRes[entryIndexes[j]] = GIN_MAYBE;
566 :
567 851 : if (key->triConsistentFn(key) == GIN_FALSE)
568 209 : break;
569 : }
570 : /* i is now the last required entry. */
571 :
2986 572 275 : MemoryContextSwitchTo(so->keyCtx);
573 :
3348 574 275 : key->nrequired = i + 1;
575 275 : key->nadditional = key->nentries - key->nrequired;
576 275 : key->requiredEntries = palloc(key->nrequired * sizeof(GinScanEntry));
577 275 : key->additionalEntries = palloc(key->nadditional * sizeof(GinScanEntry));
578 :
579 275 : j = 0;
580 1192 : for (i = 0; i < key->nrequired; i++)
581 917 : key->requiredEntries[i] = key->scanEntry[entryIndexes[j++]];
582 643 : for (i = 0; i < key->nadditional; i++)
583 368 : key->additionalEntries[i] = key->scanEntry[entryIndexes[j++]];
584 :
585 : /* clean up after consistentFn calls (also frees entryIndexes) */
586 275 : MemoryContextReset(so->tempCtx);
587 : }
588 : else
589 : {
2986 590 558 : MemoryContextSwitchTo(so->keyCtx);
591 :
3348 592 558 : key->nrequired = 1;
593 558 : key->nadditional = 0;
594 558 : key->requiredEntries = palloc(1 * sizeof(GinScanEntry));
595 558 : key->requiredEntries[0] = key->scanEntry[0];
596 : }
2986 597 852 : MemoryContextSwitchTo(oldCtx);
4474 tgl 598 852 : }
599 :
600 : static void
601 788 : startScan(IndexScanDesc scan)
602 : {
603 788 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
604 788 : GinState *ginstate = &so->ginstate;
605 : uint32 i;
606 :
607 2634 : for (i = 0; i < so->totalentries; i++)
2557 kgrittn 608 1846 : startScanEntry(ginstate, so->entries[i], scan->xs_snapshot);
609 :
5465 teodor 610 788 : if (GinFuzzySearchLimit > 0)
611 : {
612 : /*
613 : * If all of keys more than threshold we will try to reduce result, we
614 : * hope (and only hope, for intersection operation of array our
615 : * supposition isn't true), that total result will not more than
616 : * minimal predictNumberResult.
617 : */
2992 heikki.linnakangas 618 3 : bool reduce = true;
619 :
4474 tgl 620 6 : for (i = 0; i < so->totalentries; i++)
621 : {
622 3 : if (so->entries[i]->predictNumberResult <= so->totalentries * GinFuzzySearchLimit)
623 : {
2992 heikki.linnakangas 624 UBC 0 : reduce = false;
625 0 : break;
626 : }
627 : }
2992 heikki.linnakangas 628 CBC 3 : if (reduce)
629 : {
630 6 : for (i = 0; i < so->totalentries; i++)
631 : {
4474 tgl 632 3 : so->entries[i]->predictNumberResult /= so->totalentries;
2062 peter_e 633 3 : so->entries[i]->reduceResult = true;
634 : }
635 : }
636 : }
637 :
638 : /*
639 : * Now that we have the estimates for the entry frequencies, finish
640 : * initializing the scan keys.
641 : */
6031 bruce 642 1640 : for (i = 0; i < so->nkeys; i++)
3348 heikki.linnakangas 643 852 : startScanKey(ginstate, so, so->keys + i);
6186 teodor 644 788 : }
645 :
646 : /*
647 : * Load the next batch of item pointers from a posting tree.
648 : *
649 : * Note that we copy the page into GinScanEntry->list array and unlock it, but
650 : * keep it pinned to prevent interference with vacuum.
651 : */
652 : static void
2557 kgrittn 653 50 : entryLoadMoreItems(GinState *ginstate, GinScanEntry entry,
654 : ItemPointerData advancePast, Snapshot snapshot)
655 : {
656 : Page page;
657 : int i;
658 : bool stepright;
659 :
3357 heikki.linnakangas 660 50 : if (!BufferIsValid(entry->buffer))
661 : {
3357 heikki.linnakangas 662 UBC 0 : entry->isFinished = true;
663 0 : return;
664 : }
665 :
666 : /*
667 : * We have two strategies for finding the correct page: step right from
668 : * the current page, or descend the tree again from the root. If
669 : * advancePast equals the current item, the next matching item should be
670 : * on the next page, so we step right. Otherwise, descend from root.
671 : */
3357 heikki.linnakangas 672 CBC 50 : if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
673 : {
674 47 : stepright = true;
675 47 : LockBuffer(entry->buffer, GIN_SHARE);
676 : }
677 : else
678 : {
679 : GinBtreeStack *stack;
680 :
681 3 : ReleaseBuffer(entry->buffer);
682 :
683 : /*
684 : * Set the search key, and find the correct leaf page.
685 : */
686 3 : if (ItemPointerIsLossyPage(&advancePast))
687 : {
3357 heikki.linnakangas 688 UBC 0 : ItemPointerSet(&entry->btree.itemptr,
689 0 : GinItemPointerGetBlockNumber(&advancePast) + 1,
690 : FirstOffsetNumber);
691 : }
692 : else
693 : {
2203 alvherre 694 CBC 3 : ItemPointerSet(&entry->btree.itemptr,
695 : GinItemPointerGetBlockNumber(&advancePast),
2118 tgl 696 3 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
697 : }
3357 heikki.linnakangas 698 3 : entry->btree.fullScan = false;
1564 akorotkov 699 3 : stack = ginFindLeafPage(&entry->btree, true, false, snapshot);
700 :
701 : /* we don't need the stack, just the buffer. */
3357 heikki.linnakangas 702 3 : entry->buffer = stack->buffer;
703 3 : IncrBufferRefCount(entry->buffer);
704 3 : freeGinBtreeStack(stack);
705 3 : stepright = false;
706 : }
707 :
708 50 : elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
709 : GinItemPointerGetBlockNumber(&advancePast),
710 : GinItemPointerGetOffsetNumber(&advancePast),
711 : !stepright);
712 :
2545 kgrittn 713 50 : page = BufferGetPage(entry->buffer);
714 : for (;;)
715 : {
3357 heikki.linnakangas 716 53 : entry->offset = InvalidOffsetNumber;
717 53 : if (entry->list)
718 : {
719 47 : pfree(entry->list);
720 47 : entry->list = NULL;
721 47 : entry->nlist = 0;
722 : }
723 :
724 53 : if (stepright)
725 : {
726 : /*
727 : * We've processed all the entries on this page. If it was the
728 : * last page in the tree, we're done.
729 : */
730 50 : if (GinPageRightMost(page))
731 : {
732 3 : UnlockReleaseBuffer(entry->buffer);
733 3 : entry->buffer = InvalidBuffer;
2062 peter_e 734 3 : entry->isFinished = true;
3357 heikki.linnakangas 735 3 : return;
736 : }
737 :
738 : /*
739 : * Step to next page, following the right link. then find the
740 : * first ItemPointer greater than advancePast.
741 : */
742 47 : entry->buffer = ginStepRight(entry->buffer,
743 : ginstate->index,
744 : GIN_SHARE);
2545 kgrittn 745 47 : page = BufferGetPage(entry->buffer);
746 : }
3357 heikki.linnakangas 747 50 : stepright = true;
748 :
749 50 : if (GinPageGetOpaque(page)->flags & GIN_DELETED)
3260 bruce 750 UBC 0 : continue; /* page was deleted by concurrent vacuum */
751 :
752 : /*
753 : * The first item > advancePast might not be on this page, but
754 : * somewhere to the right, if the page was split, or a non-match from
755 : * another key in the query allowed us to skip some items from this
756 : * entry. Keep following the right-links until we re-find the correct
757 : * page.
758 : */
3357 heikki.linnakangas 759 CBC 68 : if (!GinPageRightMost(page) &&
760 18 : ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
761 : {
762 : /*
763 : * the item we're looking is > the right bound of the page, so it
764 : * can't be on this page.
765 : */
3357 heikki.linnakangas 766 UBC 0 : continue;
767 : }
768 :
3357 heikki.linnakangas 769 CBC 50 : entry->list = GinDataLeafPageGetItems(page, &entry->nlist, advancePast);
770 :
771 962 : for (i = 0; i < entry->nlist; i++)
772 : {
773 959 : if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
774 : {
775 47 : entry->offset = i;
776 :
777 47 : if (GinPageRightMost(page))
778 : {
779 : /* after processing the copied items, we're done. */
780 29 : UnlockReleaseBuffer(entry->buffer);
781 29 : entry->buffer = InvalidBuffer;
782 : }
783 : else
784 18 : LockBuffer(entry->buffer, GIN_UNLOCK);
5465 teodor 785 47 : return;
786 : }
787 : }
788 : }
789 : }
790 :
791 : #define gin_rand() pg_prng_double(&pg_global_prng_state)
792 : #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
793 :
794 : /*
795 : * Sets entry->curItem to next heap item pointer > advancePast, for one entry
796 : * of one scan key, or sets entry->isFinished to true if there are no more.
797 : *
798 : * Item pointers are returned in ascending order.
799 : *
800 : * Note: this can return a "lossy page" item pointer, indicating that the
801 : * entry potentially matches all items on that heap page. However, it is
802 : * not allowed to return both a lossy page pointer and exact (regular)
803 : * item pointers for the same page. (Doing so would break the key-combination
804 : * logic in keyGetItem and scanGetItem; see comment in scanGetItem.) In the
805 : * current implementation this is guaranteed by the behavior of tidbitmaps.
806 : */
807 : static void
3357 heikki.linnakangas 808 541698 : entryGetItem(GinState *ginstate, GinScanEntry entry,
809 : ItemPointerData advancePast, Snapshot snapshot)
810 : {
4635 tgl 811 541698 : Assert(!entry->isFinished);
812 :
3357 heikki.linnakangas 813 541698 : Assert(!ItemPointerIsValid(&entry->curItem) ||
814 : ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
815 :
4474 tgl 816 541698 : if (entry->matchBitmap)
817 : {
818 : /* A bitmap result */
3357 heikki.linnakangas 819 134020 : BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
820 134020 : OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
821 :
822 : for (;;)
823 : {
824 : /*
825 : * If we've exhausted all items on this block, move to next block
826 : * in the bitmap.
827 : */
3286 828 134020 : while (entry->matchResult == NULL ||
829 136223 : (entry->matchResult->ntuples >= 0 &&
830 136223 : entry->offset >= entry->matchResult->ntuples) ||
3256 831 404053 : entry->matchResult->blockno < advancePastBlk ||
832 133810 : (ItemPointerIsLossyPage(&advancePast) &&
3256 heikki.linnakangas 833 UBC 0 : entry->matchResult->blockno == advancePastBlk))
834 : {
4475 tgl 835 CBC 2623 : entry->matchResult = tbm_iterate(entry->matchIterator);
836 :
837 2623 : if (entry->matchResult == NULL)
838 : {
4635 839 210 : ItemPointerSetInvalid(&entry->curItem);
4475 840 210 : tbm_end_iterate(entry->matchIterator);
841 210 : entry->matchIterator = NULL;
2062 peter_e 842 210 : entry->isFinished = true;
5441 tgl 843 210 : break;
844 : }
845 :
846 : /*
847 : * Reset counter to the beginning of entry->matchResult. Note:
848 : * entry->offset is still greater than matchResult->ntuples if
849 : * matchResult is lossy. So, on next call we will get next
850 : * result from TIDBitmap.
851 : */
852 2413 : entry->offset = 0;
853 : }
3286 heikki.linnakangas 854 134020 : if (entry->isFinished)
855 210 : break;
856 :
857 : /*
858 : * We're now on the first page after advancePast which has any
859 : * items on it. If it's a lossy result, return that.
860 : */
4475 tgl 861 133810 : if (entry->matchResult->ntuples < 0)
862 : {
5129 tgl 863 UBC 0 : ItemPointerSetLossyPage(&entry->curItem,
864 : entry->matchResult->blockno);
865 :
866 : /*
867 : * We might as well fall out of the loop; we could not
868 : * estimate number of results on this page to support correct
869 : * reducing of result even if it's enabled.
870 : */
871 0 : break;
872 : }
873 :
874 : /*
875 : * Not a lossy page. Skip over any offsets <= advancePast, and
876 : * return that.
877 : */
3357 heikki.linnakangas 878 CBC 133810 : if (entry->matchResult->blockno == advancePastBlk)
879 : {
880 : /*
881 : * First, do a quick check against the last offset on the
882 : * page. If that's > advancePast, so are all the other
883 : * offsets, so just go back to the top to get the next page.
884 : */
3286 885 131607 : if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
886 : {
3286 heikki.linnakangas 887 UBC 0 : entry->offset = entry->matchResult->ntuples;
888 0 : continue;
889 : }
890 :
891 : /* Otherwise scan to find the first item > advancePast */
3357 heikki.linnakangas 892 CBC 131607 : while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
3357 heikki.linnakangas 893 UBC 0 : entry->offset++;
894 : }
895 :
5129 tgl 896 CBC 133810 : ItemPointerSet(&entry->curItem,
4475 897 133810 : entry->matchResult->blockno,
898 133810 : entry->matchResult->offsets[entry->offset]);
5129 899 133810 : entry->offset++;
900 :
901 : /* Done unless we need to reduce the result */
1101 902 133810 : if (!entry->reduceResult || !dropItem(entry))
903 : break;
904 : }
905 : }
5465 teodor 906 407678 : else if (!BufferIsValid(entry->buffer))
907 : {
908 : /*
909 : * A posting list from an entry tuple, or the last page of a posting
910 : * tree.
911 : */
912 : for (;;)
913 : {
3357 heikki.linnakangas 914 194714 : if (entry->offset >= entry->nlist)
915 : {
916 810 : ItemPointerSetInvalid(&entry->curItem);
2062 peter_e 917 810 : entry->isFinished = true;
3357 heikki.linnakangas 918 810 : break;
919 : }
920 :
921 193904 : entry->curItem = entry->list[entry->offset++];
922 :
923 : /* If we're not past advancePast, keep scanning */
1101 tgl 924 193904 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
925 33317 : continue;
926 :
927 : /* Done unless we need to reduce the result */
928 160587 : if (!entry->reduceResult || !dropItem(entry))
929 : break;
930 : }
931 : }
932 : else
933 : {
934 : /* A posting tree */
935 : for (;;)
936 : {
937 : /* If we've processed the current batch, load more items */
3357 heikki.linnakangas 938 339559 : while (entry->offset >= entry->nlist)
939 : {
2557 kgrittn 940 50 : entryLoadMoreItems(ginstate, entry, advancePast, snapshot);
941 :
3357 heikki.linnakangas 942 50 : if (entry->isFinished)
943 : {
944 3 : ItemPointerSetInvalid(&entry->curItem);
945 3 : return;
946 : }
947 : }
948 :
949 339509 : entry->curItem = entry->list[entry->offset++];
950 :
951 : /* If we're not past advancePast, keep scanning */
1101 tgl 952 339509 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
953 23553 : continue;
954 :
955 : /* Done unless we need to reduce the result */
956 315956 : if (!entry->reduceResult || !dropItem(entry))
957 : break;
958 :
959 : /*
960 : * Advance advancePast (so that entryLoadMoreItems will load the
961 : * right data), and keep scanning
962 : */
963 59484 : advancePast = entry->curItem;
964 : }
965 : }
966 : }
967 :
968 : /*
969 : * Identify the "current" item among the input entry streams for this scan key
970 : * that is greater than advancePast, and test whether it passes the scan key
971 : * qual condition.
972 : *
973 : * The current item is the smallest curItem among the inputs. key->curItem
974 : * is set to that value. key->curItemMatches is set to indicate whether that
975 : * TID passes the consistentFn test. If so, key->recheckCurItem is set true
976 : * iff recheck is needed for this item pointer (including the case where the
977 : * item pointer is a lossy page pointer).
978 : *
979 : * If all entry streams are exhausted, sets key->isFinished to true.
980 : *
981 : * Item pointers must be returned in ascending order.
982 : *
983 : * Note: this can return a "lossy page" item pointer, indicating that the
984 : * key potentially matches all items on that heap page. However, it is
985 : * not allowed to return both a lossy page pointer and exact (regular)
986 : * item pointers for the same page. (Doing so would break the key-combination
987 : * logic in scanGetItem.)
988 : */
989 : static void
3357 heikki.linnakangas 990 501146 : keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
991 : ItemPointerData advancePast, Snapshot snapshot)
992 : {
993 : ItemPointerData minItem;
994 : ItemPointerData curPageLossy;
995 : uint32 i;
996 : bool haveLossyEntry;
997 : GinScanEntry entry;
998 : GinTernaryValue res;
999 : MemoryContext oldCtx;
1000 : bool allFinished;
1001 :
4635 tgl 1002 501146 : Assert(!key->isFinished);
1003 :
1004 : /*
1005 : * We might have already tested this item; if so, no need to repeat work.
1006 : * (Note: the ">" case can happen, if advancePast is exact but we
1007 : * previously had to set curItem to a lossy-page pointer.)
1008 : */
3357 heikki.linnakangas 1009 501146 : if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
1010 799 : return;
1011 :
1012 : /*
1013 : * Find the minimum item > advancePast among the active entry streams.
1014 : *
1015 : * Note: a lossy-page entry is encoded by a ItemPointer with max value for
1016 : * offset (0xffff), so that it will sort after any exact entries for the
1017 : * same page. So we'll prefer to return exact pointers not lossy
1018 : * pointers, which is good.
1019 : */
4474 tgl 1020 501135 : ItemPointerSetMax(&minItem);
3357 heikki.linnakangas 1021 501135 : allFinished = true;
3348 1022 1357742 : for (i = 0; i < key->nrequired; i++)
1023 : {
1024 856607 : entry = key->requiredEntries[i];
1025 :
1026 856607 : if (entry->isFinished)
1027 262844 : continue;
1028 :
1029 : /*
1030 : * Advance this stream if necessary.
1031 : *
1032 : * In particular, since entry->curItem was initialized with
1033 : * ItemPointerSetMin, this ensures we fetch the first item for each
1034 : * entry on the first call.
1035 : */
1036 593763 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1037 : {
2557 kgrittn 1038 514820 : entryGetItem(ginstate, entry, advancePast, snapshot);
3348 heikki.linnakangas 1039 514820 : if (entry->isFinished)
1040 934 : continue;
1041 : }
1042 :
1043 592829 : allFinished = false;
1044 592829 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1045 548728 : minItem = entry->curItem;
1046 : }
1047 :
1177 akorotkov 1048 501135 : if (allFinished && !key->excludeOnly)
1049 : {
1050 : /* all entries are finished */
2062 peter_e 1051 788 : key->isFinished = true;
4474 tgl 1052 788 : return;
1053 : }
1054 :
1177 akorotkov 1055 500347 : if (!key->excludeOnly)
1056 : {
1057 : /*
1058 : * For a normal scan key, we now know there are no matches < minItem.
1059 : *
1060 : * If minItem is lossy, it means that there were no exact items on the
1061 : * page among requiredEntries, because lossy pointers sort after exact
1062 : * items. However, there might be exact items for the same page among
1063 : * additionalEntries, so we mustn't advance past them.
1064 : */
1065 498109 : if (ItemPointerIsLossyPage(&minItem))
1066 : {
1177 akorotkov 1067 UBC 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1068 0 : GinItemPointerGetBlockNumber(&minItem))
1069 : {
1070 0 : ItemPointerSet(&advancePast,
1071 : GinItemPointerGetBlockNumber(&minItem),
1072 : InvalidOffsetNumber);
1073 : }
1074 : }
1075 : else
1076 : {
1177 akorotkov 1077 CBC 498109 : Assert(GinItemPointerGetOffsetNumber(&minItem) > 0);
2203 alvherre 1078 498109 : ItemPointerSet(&advancePast,
1079 : GinItemPointerGetBlockNumber(&minItem),
1177 akorotkov 1080 498109 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&minItem)));
1081 : }
1082 : }
1083 : else
1084 : {
1085 : /*
1086 : * excludeOnly scan keys don't have any entries that are necessarily
1087 : * present in matching items. So, we consider the item just after
1088 : * advancePast.
1089 : */
1090 2238 : Assert(key->nrequired == 0);
1091 2238 : ItemPointerSet(&minItem,
1092 : GinItemPointerGetBlockNumber(&advancePast),
1093 2238 : OffsetNumberNext(GinItemPointerGetOffsetNumber(&advancePast)));
1094 : }
1095 :
1096 : /*
1097 : * We might not have loaded all the entry streams for this TID yet. We
1098 : * could call the consistent function, passing MAYBE for those entries, to
1099 : * see if it can decide if this TID matches based on the information we
1100 : * have. But if the consistent-function is expensive, and cannot in fact
1101 : * decide with partial information, that could be a big loss. So, load all
1102 : * the additional entries, before calling the consistent function.
1103 : */
3348 heikki.linnakangas 1104 533677 : for (i = 0; i < key->nadditional; i++)
1105 : {
1106 33330 : entry = key->additionalEntries[i];
1107 :
1108 33330 : if (entry->isFinished)
1109 206 : continue;
1110 :
1111 33124 : if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1112 : {
2557 kgrittn 1113 26878 : entryGetItem(ginstate, entry, advancePast, snapshot);
3348 heikki.linnakangas 1114 26878 : if (entry->isFinished)
1115 89 : continue;
1116 : }
1117 :
1118 : /*
1119 : * Normally, none of the items in additionalEntries can have a curItem
1120 : * larger than minItem. But if minItem is a lossy page, then there
1121 : * might be exact items on the same page among additionalEntries.
1122 : */
1123 33035 : if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
1124 : {
3348 heikki.linnakangas 1125 UBC 0 : Assert(ItemPointerIsLossyPage(&minItem));
1126 0 : minItem = entry->curItem;
1127 : }
1128 : }
1129 :
1130 : /*
1131 : * Ok, we've advanced all the entries up to minItem now. Set key->curItem,
1132 : * and perform consistentFn test.
1133 : *
1134 : * Lossy-page entries pose a problem, since we don't know the correct
1135 : * entryRes state to pass to the consistentFn, and we also don't know what
1136 : * its combining logic will be (could be AND, OR, or even NOT). If the
1137 : * logic is OR then the consistentFn might succeed for all items in the
1138 : * lossy page even when none of the other entries match.
1139 : *
1140 : * Our strategy is to call the tri-state consistent function, with the
1141 : * lossy-page entries set to MAYBE, and all the other entries FALSE. If it
1142 : * returns FALSE, none of the lossy items alone are enough for a match, so
1143 : * we don't need to return a lossy-page pointer. Otherwise, return a
1144 : * lossy-page pointer to indicate that the whole heap page must be
1145 : * checked. (On subsequent calls, we'll do nothing until minItem is past
1146 : * the page altogether, thus ensuring that we never return both regular
1147 : * and lossy pointers for the same page.)
1148 : *
1149 : * An exception is that it doesn't matter what we pass for lossy pointers
1150 : * in "hidden" entries, because the consistentFn's result can't depend on
1151 : * them. We could pass them as MAYBE as well, but if we're using the
1152 : * "shim" implementation of a tri-state consistent function (see
1153 : * ginlogic.c), it's better to pass as few MAYBEs as possible. So pass
1154 : * them as true.
1155 : *
1156 : * Note that only lossy-page entries pointing to the current item's page
1157 : * should trigger this processing; we might have future lossy pages in the
1158 : * entry array, but they aren't relevant yet.
1159 : */
3348 heikki.linnakangas 1160 CBC 500347 : key->curItem = minItem;
4474 tgl 1161 500347 : ItemPointerSetLossyPage(&curPageLossy,
1162 : GinItemPointerGetBlockNumber(&key->curItem));
1163 500347 : haveLossyEntry = false;
1164 1388854 : for (i = 0; i < key->nentries; i++)
1165 : {
1166 888507 : entry = key->scanEntry[i];
2062 peter_e 1167 1514371 : if (entry->isFinished == false &&
4474 tgl 1168 625864 : ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
1169 : {
3348 heikki.linnakangas 1170 UBC 0 : if (i < key->nuserentries)
1171 0 : key->entryRes[i] = GIN_MAYBE;
1172 : else
1173 0 : key->entryRes[i] = GIN_TRUE;
4474 tgl 1174 0 : haveLossyEntry = true;
1175 : }
1176 : else
3348 heikki.linnakangas 1177 CBC 888507 : key->entryRes[i] = GIN_FALSE;
1178 : }
1179 :
1180 : /* prepare for calling consistentFn in temp context */
4474 tgl 1181 500347 : oldCtx = MemoryContextSwitchTo(tempCtx);
1182 :
1183 500347 : if (haveLossyEntry)
1184 : {
1185 : /* Have lossy-page entries, so see if whole page matches */
3348 heikki.linnakangas 1186 UBC 0 : res = key->triConsistentFn(key);
1187 :
1188 0 : if (res == GIN_TRUE || res == GIN_MAYBE)
1189 : {
1190 : /* Yes, so clean up ... */
4474 tgl 1191 0 : MemoryContextSwitchTo(oldCtx);
1192 0 : MemoryContextReset(tempCtx);
1193 :
1194 : /* and return lossy pointer for whole page */
1195 0 : key->curItem = curPageLossy;
1196 0 : key->curItemMatches = true;
1197 0 : key->recheckCurItem = true;
1198 0 : return;
1199 : }
1200 : }
1201 :
1202 : /*
1203 : * At this point we know that we don't need to return a lossy whole-page
1204 : * pointer, but we might have matches for individual exact item pointers,
1205 : * possibly in combination with a lossy pointer. Pass lossy pointers as
1206 : * MAYBE to the ternary consistent function, to let it decide if this
1207 : * tuple satisfies the overall key, even though we don't know if the lossy
1208 : * entries match.
1209 : *
1210 : * Prepare entryRes array to be passed to consistentFn.
1211 : */
4474 tgl 1212 CBC 1388854 : for (i = 0; i < key->nentries; i++)
1213 : {
1214 888507 : entry = key->scanEntry[i];
3348 heikki.linnakangas 1215 888507 : if (entry->isFinished)
1216 262643 : key->entryRes[i] = GIN_FALSE;
1217 : #if 0
1218 :
1219 : /*
1220 : * This case can't currently happen, because we loaded all the entries
1221 : * for this item earlier.
1222 : */
1223 : else if (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
1224 : key->entryRes[i] = GIN_MAYBE;
1225 : #endif
1226 625864 : else if (ginCompareItemPointers(&entry->curItem, &curPageLossy) == 0)
3348 heikki.linnakangas 1227 UBC 0 : key->entryRes[i] = GIN_MAYBE;
3348 heikki.linnakangas 1228 CBC 625864 : else if (ginCompareItemPointers(&entry->curItem, &minItem) == 0)
1229 535537 : key->entryRes[i] = GIN_TRUE;
1230 : else
1231 90327 : key->entryRes[i] = GIN_FALSE;
1232 : }
1233 :
1234 500347 : res = key->triConsistentFn(key);
1235 :
1236 500347 : switch (res)
1237 : {
1238 432523 : case GIN_TRUE:
1239 432523 : key->curItemMatches = true;
1240 : /* triConsistentFn set recheckCurItem */
1241 432523 : break;
1242 :
1243 9013 : case GIN_FALSE:
1244 9013 : key->curItemMatches = false;
1245 9013 : break;
1246 :
1247 58811 : case GIN_MAYBE:
1248 58811 : key->curItemMatches = true;
1249 58811 : key->recheckCurItem = true;
1250 58811 : break;
1251 :
3348 heikki.linnakangas 1252 UBC 0 : default:
1253 :
1254 : /*
1255 : * the 'default' case shouldn't happen, but if the consistent
1256 : * function returns something bogus, this is the safe result
1257 : */
1258 0 : key->curItemMatches = true;
1259 0 : key->recheckCurItem = true;
1260 0 : break;
1261 : }
1262 :
1263 : /*
1264 : * We have a tuple, and we know if it matches or not. If it's a non-match,
1265 : * we could continue to find the next matching tuple, but let's break out
1266 : * and give scanGetItem a chance to advance the other keys. They might be
1267 : * able to skip past to a much higher TID, allowing us to save work.
1268 : */
1269 :
1270 : /* clean up after consistentFn calls */
4474 tgl 1271 CBC 500347 : MemoryContextSwitchTo(oldCtx);
1272 500347 : MemoryContextReset(tempCtx);
1273 : }
1274 :
1275 : /*
1276 : * Get next heap item pointer (after advancePast) from scan.
1277 : * Returns true if anything found.
1278 : * On success, *item and *recheck are set.
1279 : *
1280 : * Note: this is very nearly the same logic as in keyGetItem(), except
1281 : * that we know the keys are to be combined with AND logic, whereas in
1282 : * keyGetItem() the combination logic is known only to the consistentFn.
1283 : */
1284 : static bool
3357 heikki.linnakangas 1285 489836 : scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
1286 : ItemPointerData *item, bool *recheck)
1287 : {
4475 tgl 1288 489836 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1289 : uint32 i;
1290 : bool match;
1291 :
1292 : /*----------
1293 : * Advance the scan keys in lock-step, until we find an item that matches
1294 : * all the keys. If any key reports isFinished, meaning its subset of the
1295 : * entries is exhausted, we can stop. Otherwise, set *item to the next
1296 : * matching item.
1297 : *
1298 : * This logic works only if a keyGetItem stream can never contain both
1299 : * exact and lossy pointers for the same page. Else we could have a
1300 : * case like
1301 : *
1302 : * stream 1 stream 2
1303 : * ... ...
1304 : * 42/6 42/7
1305 : * 50/1 42/0xffff
1306 : * ... ...
1307 : *
1308 : * We would conclude that 42/6 is not a match and advance stream 1,
1309 : * thus never detecting the match to the lossy pointer in stream 2.
1310 : * (keyGetItem has a similar problem versus entryGetItem.)
1311 : *----------
1312 : */
1313 : do
1314 : {
3357 heikki.linnakangas 1315 498878 : ItemPointerSetMin(item);
1316 498878 : match = true;
1317 990223 : for (i = 0; i < so->nkeys && match; i++)
1318 : {
4475 tgl 1319 501146 : GinScanKey key = so->keys + i;
1320 :
1321 : /*
1322 : * If we're considering a lossy page, skip excludeOnly keys. They
1323 : * can't exclude the whole page anyway.
1324 : */
1177 akorotkov 1325 501146 : if (ItemPointerIsLossyPage(item) && key->excludeOnly)
1326 : {
1327 : /*
1328 : * ginNewScanKey() should never mark the first key as
1329 : * excludeOnly.
1330 : */
1177 akorotkov 1331 UBC 0 : Assert(i > 0);
1332 0 : continue;
1333 : }
1334 :
1335 : /* Fetch the next item for this key that is > advancePast. */
2557 kgrittn 1336 CBC 501146 : keyGetItem(&so->ginstate, so->tempCtx, key, advancePast,
1337 : scan->xs_snapshot);
1338 :
4475 tgl 1339 501146 : if (key->isFinished)
3357 heikki.linnakangas 1340 788 : return false;
1341 :
1342 : /*
1343 : * If it's not a match, we can immediately conclude that nothing
1344 : * <= this item matches, without checking the rest of the keys.
1345 : */
1346 500358 : if (!key->curItemMatches)
1347 : {
1348 9013 : advancePast = key->curItem;
1349 9013 : match = false;
1350 9013 : break;
1351 : }
1352 :
1353 : /*
1354 : * It's a match. We can conclude that nothing < matches, so the
1355 : * other key streams can skip to this item.
1356 : *
1357 : * Beware of lossy pointers, though; from a lossy pointer, we can
1358 : * only conclude that nothing smaller than this *block* matches.
1359 : */
1360 491345 : if (ItemPointerIsLossyPage(&key->curItem))
1361 : {
3357 heikki.linnakangas 1362 UBC 0 : if (GinItemPointerGetBlockNumber(&advancePast) <
1363 0 : GinItemPointerGetBlockNumber(&key->curItem))
1364 : {
2203 alvherre 1365 0 : ItemPointerSet(&advancePast,
2118 tgl 1366 0 : GinItemPointerGetBlockNumber(&key->curItem),
1367 : InvalidOffsetNumber);
1368 : }
1369 : }
1370 : else
1371 : {
2203 alvherre 1372 CBC 491345 : Assert(GinItemPointerGetOffsetNumber(&key->curItem) > 0);
1373 491345 : ItemPointerSet(&advancePast,
1374 491345 : GinItemPointerGetBlockNumber(&key->curItem),
1375 491345 : OffsetNumberPrev(GinItemPointerGetOffsetNumber(&key->curItem)));
1376 : }
1377 :
1378 : /*
1379 : * If this is the first key, remember this location as a potential
1380 : * match, and proceed to check the rest of the keys.
1381 : *
1382 : * Otherwise, check if this is the same item that we checked the
1383 : * previous keys for (or a lossy pointer for the same page). If
1384 : * not, loop back to check the previous keys for this item (we
1385 : * will check this key again too, but keyGetItem returns quickly
1386 : * for that)
1387 : */
3357 heikki.linnakangas 1388 491345 : if (i == 0)
1389 : {
1390 489130 : *item = key->curItem;
1391 : }
1392 : else
1393 : {
1394 4430 : if (ItemPointerIsLossyPage(&key->curItem) ||
1395 2215 : ItemPointerIsLossyPage(item))
1396 : {
3260 bruce 1397 UBC 0 : Assert(GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
3357 heikki.linnakangas 1398 0 : match = (GinItemPointerGetBlockNumber(&key->curItem) ==
1399 0 : GinItemPointerGetBlockNumber(item));
1400 : }
1401 : else
1402 : {
3357 heikki.linnakangas 1403 CBC 2215 : Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
1404 2215 : match = (ginCompareItemPointers(&key->curItem, item) == 0);
1405 : }
1406 : }
1407 : }
1408 498090 : } while (!match);
1409 :
1410 489048 : Assert(!ItemPointerIsMin(item));
1411 :
1412 : /*
1413 : * Now *item contains the first ItemPointer after previous result that
1414 : * satisfied all the keys for that exact TID, or a lossy reference to the
1415 : * same page.
1416 : *
1417 : * We must return recheck = true if any of the keys are marked recheck.
1418 : */
4475 tgl 1419 489048 : *recheck = false;
1420 912948 : for (i = 0; i < so->nkeys; i++)
1421 : {
1422 489234 : GinScanKey key = so->keys + i;
1423 :
1424 489234 : if (key->recheckCurItem)
1425 : {
1426 65334 : *recheck = true;
1427 65334 : break;
1428 : }
1429 : }
1430 :
2062 peter_e 1431 489048 : return true;
1432 : }
1433 :
1434 :
1435 : /*
1436 : * Functions for scanning the pending list
1437 : */
1438 :
1439 :
1440 : /*
1441 : * Get ItemPointer of next heap row to be checked from pending list.
1442 : * Returns false if there are no more. On pages with several heap rows
1443 : * it returns each row separately, on page with part of heap row returns
1444 : * per page data. pos->firstOffset and pos->lastOffset are set to identify
1445 : * the range of pending-list tuples belonging to this heap row.
1446 : *
1447 : * The pendingBuffer is presumed pinned and share-locked on entry, and is
1448 : * pinned and share-locked on success exit. On failure exit it's released.
1449 : */
1450 : static bool
5129 tgl 1451 807 : scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
1452 : {
1453 : OffsetNumber maxoff;
1454 : Page page;
1455 : IndexTuple itup;
1456 :
5050 bruce 1457 807 : ItemPointerSetInvalid(&pos->item);
1458 : for (;;)
1459 : {
2545 kgrittn 1460 807 : page = BufferGetPage(pos->pendingBuffer);
1461 807 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1462 :
5129 tgl 1463 807 : maxoff = PageGetMaxOffsetNumber(page);
5050 bruce 1464 807 : if (pos->firstOffset > maxoff)
1465 : {
5129 tgl 1466 83 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
1467 :
5050 bruce 1468 83 : if (blkno == InvalidBlockNumber)
1469 : {
5129 tgl 1470 83 : UnlockReleaseBuffer(pos->pendingBuffer);
5050 bruce 1471 83 : pos->pendingBuffer = InvalidBuffer;
1472 :
5129 tgl 1473 83 : return false;
1474 : }
1475 : else
1476 : {
1477 : /*
1478 : * Here we must prevent deletion of next page by insertcleanup
1479 : * process, which may be trying to obtain exclusive lock on
1480 : * current page. So, we lock next page before releasing the
1481 : * current one
1482 : */
5050 bruce 1483 UBC 0 : Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
1484 :
5129 tgl 1485 0 : LockBuffer(tmpbuf, GIN_SHARE);
1486 0 : UnlockReleaseBuffer(pos->pendingBuffer);
1487 :
1488 0 : pos->pendingBuffer = tmpbuf;
1489 0 : pos->firstOffset = FirstOffsetNumber;
1490 : }
1491 : }
1492 : else
1493 : {
5129 tgl 1494 CBC 724 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
1495 724 : pos->item = itup->t_tid;
5050 bruce 1496 724 : if (GinPageHasFullRow(page))
1497 : {
1498 : /*
1499 : * find itempointer to the next row
1500 : */
1501 1677 : for (pos->lastOffset = pos->firstOffset + 1; pos->lastOffset <= maxoff; pos->lastOffset++)
1502 : {
5129 tgl 1503 1594 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
1504 1594 : if (!ItemPointerEquals(&pos->item, &itup->t_tid))
1505 641 : break;
1506 : }
1507 : }
1508 : else
1509 : {
1510 : /*
1511 : * All itempointers are the same on this page
1512 : */
5129 tgl 1513 UBC 0 : pos->lastOffset = maxoff + 1;
1514 : }
1515 :
1516 : /*
1517 : * Now pos->firstOffset points to the first tuple of current heap
1518 : * row, pos->lastOffset points to the first tuple of next heap row
1519 : * (or to the end of page)
1520 : */
5129 tgl 1521 CBC 724 : break;
1522 : }
1523 : }
1524 :
1525 724 : return true;
1526 : }
1527 :
1528 : /*
1529 : * Scan pending-list page from current tuple (off) up till the first of:
1530 : * - match is found (then returns true)
1531 : * - no later match is possible
1532 : * - tuple's attribute number is not equal to entry's attrnum
1533 : * - reach end of page
1534 : *
1535 : * datum[]/category[]/datumExtracted[] arrays are used to cache the results
1536 : * of gintuple_get_key() on the current page.
1537 : */
1538 : static bool
5129 tgl 1539 UBC 0 : matchPartialInPendingList(GinState *ginstate, Page page,
1540 : OffsetNumber off, OffsetNumber maxoff,
1541 : GinScanEntry entry,
1542 : Datum *datum, GinNullCategory *category,
1543 : bool *datumExtracted)
1544 : {
1545 : IndexTuple itup;
1546 : int32 cmp;
1547 :
1548 : /* Partial match to a null is not possible */
4475 1549 0 : if (entry->queryCategory != GIN_CAT_NORM_KEY)
1550 0 : return false;
1551 :
5050 bruce 1552 0 : while (off < maxoff)
1553 : {
5129 tgl 1554 0 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
1555 :
4475 1556 0 : if (gintuple_get_attrnum(ginstate, itup) != entry->attnum)
5129 1557 0 : return false;
1558 :
5050 bruce 1559 0 : if (datumExtracted[off - 1] == false)
1560 : {
4475 tgl 1561 0 : datum[off - 1] = gintuple_get_key(ginstate, itup,
1562 0 : &category[off - 1]);
5050 bruce 1563 0 : datumExtracted[off - 1] = true;
1564 : }
1565 :
1566 : /* Once we hit nulls, no further match is possible */
4475 tgl 1567 0 : if (category[off - 1] != GIN_CAT_NORM_KEY)
1568 0 : return false;
1569 :
1570 : /*----------
1571 : * Check partial match.
1572 : * case cmp == 0 => match
1573 : * case cmp > 0 => not match and end scan (no later match possible)
1574 : * case cmp < 0 => not match and continue scan
1575 : *----------
1576 : */
4380 1577 0 : cmp = DatumGetInt32(FunctionCall4Coll(&ginstate->comparePartialFn[entry->attnum - 1],
2118 1578 0 : ginstate->supportCollation[entry->attnum - 1],
1579 : entry->queryKey,
4380 1580 0 : datum[off - 1],
4322 bruce 1581 0 : UInt16GetDatum(entry->strategy),
2118 tgl 1582 0 : PointerGetDatum(entry->extra_data)));
5128 1583 0 : if (cmp == 0)
5129 1584 0 : return true;
5128 1585 0 : else if (cmp > 0)
5129 1586 0 : return false;
1587 :
5117 teodor 1588 0 : off++;
1589 : }
1590 :
5129 tgl 1591 0 : return false;
1592 : }
1593 :
1594 : /*
1595 : * Set up the entryRes array for each key by looking at
1596 : * every entry for current heap row in pending list.
1597 : *
1598 : * Returns true if each scan key has at least one entryRes match.
1599 : * This corresponds to the situations where the normal index search will
1600 : * try to apply the key's consistentFn. (A tuple not meeting that requirement
1601 : * cannot be returned by the normal search since no entry stream will
1602 : * source its TID.)
1603 : *
1604 : * The pendingBuffer is presumed pinned and share-locked on entry.
1605 : */
1606 : static bool
4475 tgl 1607 CBC 724 : collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos)
1608 : {
5050 bruce 1609 724 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1610 : OffsetNumber attrnum;
1611 : Page page;
1612 : IndexTuple itup;
1613 : int i,
1614 : j;
1615 :
1616 : /*
1617 : * Reset all entryRes and hasMatchKey flags
1618 : */
5129 tgl 1619 1962 : for (i = 0; i < so->nkeys; i++)
1620 : {
1621 1238 : GinScanKey key = so->keys + i;
1622 :
3348 heikki.linnakangas 1623 1238 : memset(key->entryRes, GIN_FALSE, key->nentries);
1624 : }
2062 peter_e 1625 724 : memset(pos->hasMatchKey, false, so->nkeys);
1626 :
1627 : /*
1628 : * Outer loop iterates over multiple pending-list pages when a single heap
1629 : * row has entries spanning those pages.
1630 : */
1631 : for (;;)
5129 tgl 1632 UBC 0 : {
1633 : Datum datum[BLCKSZ / sizeof(IndexTupleData)];
1634 : GinNullCategory category[BLCKSZ / sizeof(IndexTupleData)];
1635 : bool datumExtracted[BLCKSZ / sizeof(IndexTupleData)];
1636 :
5050 bruce 1637 CBC 724 : Assert(pos->lastOffset > pos->firstOffset);
4475 tgl 1638 724 : memset(datumExtracted + pos->firstOffset - 1, 0,
1639 724 : sizeof(bool) * (pos->lastOffset - pos->firstOffset));
1640 :
2545 kgrittn 1641 724 : page = BufferGetPage(pos->pendingBuffer);
1642 724 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
1643 :
5050 bruce 1644 1962 : for (i = 0; i < so->nkeys; i++)
1645 : {
1646 1238 : GinScanKey key = so->keys + i;
1647 :
1648 2388 : for (j = 0; j < key->nentries; j++)
1649 : {
4474 tgl 1650 1150 : GinScanEntry entry = key->scanEntry[j];
5050 bruce 1651 1150 : OffsetNumber StopLow = pos->firstOffset,
1652 1150 : StopHigh = pos->lastOffset,
1653 : StopMiddle;
1654 :
1655 : /* If already matched on earlier page, do no extra work */
1656 1150 : if (key->entryRes[j])
5129 tgl 1657 UBC 0 : continue;
1658 :
1659 : /*
1660 : * Interesting tuples are from pos->firstOffset to
1661 : * pos->lastOffset and they are ordered by (attnum, Datum) as
1662 : * it's done in entry tree. So we can use binary search to
1663 : * avoid linear scanning.
1664 : */
5129 tgl 1665 CBC 2554 : while (StopLow < StopHigh)
1666 : {
1667 : int res;
1668 :
1669 2005 : StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
1670 :
1671 2005 : itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
1672 :
1673 2005 : attrnum = gintuple_get_attrnum(&so->ginstate, itup);
1674 :
1675 2005 : if (key->attnum < attrnum)
1676 : {
1677 399 : StopHigh = StopMiddle;
4475 1678 399 : continue;
1679 : }
1680 1606 : if (key->attnum > attrnum)
1681 : {
5129 1682 330 : StopLow = StopMiddle + 1;
4475 1683 330 : continue;
1684 : }
1685 :
1686 1276 : if (datumExtracted[StopMiddle - 1] == false)
1687 : {
1688 2476 : datum[StopMiddle - 1] =
1689 1238 : gintuple_get_key(&so->ginstate, itup,
1690 1238 : &category[StopMiddle - 1]);
1691 1238 : datumExtracted[StopMiddle - 1] = true;
1692 : }
1693 :
1694 1276 : if (entry->queryCategory == GIN_CAT_EMPTY_QUERY)
1695 : {
1696 : /* special behavior depending on searchMode */
1697 542 : if (entry->searchMode == GIN_SEARCH_MODE_ALL)
1698 : {
1699 : /* match anything except NULL_ITEM */
1700 542 : if (category[StopMiddle - 1] == GIN_CAT_NULL_ITEM)
1701 189 : res = -1;
1702 : else
1703 353 : res = 0;
1704 : }
1705 : else
1706 : {
1707 : /* match everything */
4475 tgl 1708 UBC 0 : res = 0;
1709 : }
1710 : }
1711 : else
1712 : {
4557 tgl 1713 CBC 734 : res = ginCompareEntries(&so->ginstate,
1714 734 : entry->attnum,
1715 : entry->queryKey,
4475 1716 734 : entry->queryCategory,
1717 734 : datum[StopMiddle - 1],
1718 734 : category[StopMiddle - 1]);
1719 : }
1720 :
1721 1276 : if (res == 0)
1722 : {
1723 : /*
1724 : * Found exact match (there can be only one, except in
1725 : * EMPTY_QUERY mode).
1726 : *
1727 : * If doing partial match, scan forward from here to
1728 : * end of page to check for matches.
1729 : *
1730 : * See comment above about tuple's ordering.
1731 : */
1732 601 : if (entry->isPartialMatch)
4475 tgl 1733 UBC 0 : key->entryRes[j] =
1734 0 : matchPartialInPendingList(&so->ginstate,
1735 : page,
1736 : StopMiddle,
1737 0 : pos->lastOffset,
1738 : entry,
1739 : datum,
1740 : category,
1741 : datumExtracted);
1742 : else
4475 tgl 1743 CBC 601 : key->entryRes[j] = true;
1744 :
1745 : /* done with binary search */
1746 601 : break;
1747 : }
1748 675 : else if (res < 0)
1749 657 : StopHigh = StopMiddle;
1750 : else
1751 18 : StopLow = StopMiddle + 1;
1752 : }
1753 :
5050 bruce 1754 1150 : if (StopLow >= StopHigh && entry->isPartialMatch)
1755 : {
1756 : /*
1757 : * No exact match on this page. If doing partial match,
1758 : * scan from the first tuple greater than target value to
1759 : * end of page. Note that since we don't remember whether
1760 : * the comparePartialFn told us to stop early on a
1761 : * previous page, we will uselessly apply comparePartialFn
1762 : * to the first tuple on each subsequent page.
1763 : */
5129 tgl 1764 UBC 0 : key->entryRes[j] =
1765 0 : matchPartialInPendingList(&so->ginstate,
1766 : page,
1767 : StopHigh,
1768 0 : pos->lastOffset,
1769 : entry,
1770 : datum,
1771 : category,
1772 : datumExtracted);
1773 : }
1774 :
4895 teodor 1775 CBC 1150 : pos->hasMatchKey[i] |= key->entryRes[j];
1776 : }
1777 : }
1778 :
1779 : /* Advance firstOffset over the scanned tuples */
5129 tgl 1780 724 : pos->firstOffset = pos->lastOffset;
1781 :
5050 bruce 1782 724 : if (GinPageHasFullRow(page))
1783 : {
1784 : /*
1785 : * We have examined all pending entries for the current heap row.
1786 : * Break out of loop over pages.
1787 : */
4475 tgl 1788 724 : break;
1789 : }
1790 : else
1791 : {
1792 : /*
1793 : * Advance to next page of pending entries for the current heap
1794 : * row. Complain if there isn't one.
1795 : */
4475 tgl 1796 UBC 0 : ItemPointerData item = pos->item;
1797 :
1798 0 : if (scanGetCandidate(scan, pos) == false ||
1799 0 : !ItemPointerEquals(&pos->item, &item))
1800 0 : elog(ERROR, "could not find additional pending pages for same heap tuple");
1801 : }
1802 : }
1803 :
1804 : /*
1805 : * All scan keys except excludeOnly require at least one entry to match.
1806 : * excludeOnly keys are an exception, because their implied
1807 : * GIN_CAT_EMPTY_QUERY scanEntry always matches. So return "true" if all
1808 : * non-excludeOnly scan keys have at least one match.
1809 : */
4475 tgl 1810 CBC 1269 : for (i = 0; i < so->nkeys; i++)
1811 : {
1177 akorotkov 1812 992 : if (pos->hasMatchKey[i] == false && !so->keys[i].excludeOnly)
4475 tgl 1813 447 : return false;
1814 : }
1815 :
1816 277 : return true;
1817 : }
1818 :
1819 : /*
1820 : * Collect all matched rows from pending list into bitmap.
1821 : */
1822 : static void
5129 1823 788 : scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
1824 : {
1825 788 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1826 : MemoryContext oldCtx;
1827 : bool recheck,
1828 : match;
1829 : int i;
1830 : pendingPosition pos;
5050 bruce 1831 788 : Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
1832 : Page page;
1833 : BlockNumber blkno;
1834 :
5129 tgl 1835 788 : *ntids = 0;
1836 :
1837 : /*
1838 : * Acquire predicate lock on the metapage, to conflict with any fastupdate
1839 : * insertions.
1840 : */
1801 teodor 1841 788 : PredicateLockPage(scan->indexRelation, GIN_METAPAGE_BLKNO, scan->xs_snapshot);
1842 :
5129 tgl 1843 788 : LockBuffer(metabuffer, GIN_SHARE);
2545 kgrittn 1844 788 : page = BufferGetPage(metabuffer);
1845 788 : TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
2557 1846 788 : blkno = GinPageGetMeta(page)->head;
1847 :
1848 : /*
1849 : * fetch head of list before unlocking metapage. head page must be pinned
1850 : * to prevent deletion by vacuum process
1851 : */
5050 bruce 1852 788 : if (blkno == InvalidBlockNumber)
1853 : {
1854 : /* No pending list, so proceed with normal scan */
1855 705 : UnlockReleaseBuffer(metabuffer);
5129 tgl 1856 705 : return;
1857 : }
1858 :
1859 83 : pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
1860 83 : LockBuffer(pos.pendingBuffer, GIN_SHARE);
1861 83 : pos.firstOffset = FirstOffsetNumber;
5050 bruce 1862 83 : UnlockReleaseBuffer(metabuffer);
4895 teodor 1863 83 : pos.hasMatchKey = palloc(sizeof(bool) * so->nkeys);
1864 :
1865 : /*
1866 : * loop for each heap row. scanGetCandidate returns full row or row's
1867 : * tuples from first page.
1868 : */
5050 bruce 1869 807 : while (scanGetCandidate(scan, &pos))
1870 : {
1871 : /*
1872 : * Check entries in tuple and set up entryRes array.
1873 : *
1874 : * If pending tuples belonging to the current heap row are spread
1875 : * across several pages, collectMatchesForHeapRow will read all of
1876 : * those pages.
1877 : */
4475 tgl 1878 724 : if (!collectMatchesForHeapRow(scan, &pos))
5129 1879 447 : continue;
1880 :
1881 : /*
1882 : * Matching of entries of one row is finished, so check row using
1883 : * consistent functions.
1884 : */
1885 277 : oldCtx = MemoryContextSwitchTo(so->tempCtx);
1886 277 : recheck = false;
1887 277 : match = true;
1888 :
5128 1889 702 : for (i = 0; i < so->nkeys; i++)
1890 : {
5050 bruce 1891 425 : GinScanKey key = so->keys + i;
1892 :
3348 heikki.linnakangas 1893 425 : if (!key->boolConsistentFn(key))
1894 : {
5129 tgl 1895 UBC 0 : match = false;
5128 1896 0 : break;
1897 : }
4634 tgl 1898 CBC 425 : recheck |= key->recheckCurItem;
1899 : }
1900 :
5129 1901 277 : MemoryContextSwitchTo(oldCtx);
1902 277 : MemoryContextReset(so->tempCtx);
1903 :
5050 bruce 1904 277 : if (match)
1905 : {
5129 tgl 1906 277 : tbm_add_tuples(tbm, &pos.item, 1, recheck);
1907 277 : (*ntids)++;
1908 : }
1909 : }
1910 :
4895 teodor 1911 83 : pfree(pos.hasMatchKey);
1912 : }
1913 :
1914 :
1915 : #define GinIsVoidRes(s) ( ((GinScanOpaque) scan->opaque)->isVoidRes )
1916 :
1917 : int64
2639 tgl 1918 794 : gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
1919 : {
2991 heikki.linnakangas 1920 794 : GinScanOpaque so = (GinScanOpaque) scan->opaque;
1921 : int64 ntids;
1922 : ItemPointerData iptr;
1923 : bool recheck;
1924 :
1925 : /*
1926 : * Set up the scan keys, and check for unsatisfiable query.
1927 : */
2878 bruce 1928 794 : ginFreeScanKeys(so); /* there should be no keys yet, but just to be
1929 : * sure */
2991 heikki.linnakangas 1930 794 : ginNewScanKey(scan);
1931 :
5912 teodor 1932 794 : if (GinIsVoidRes(scan))
2639 tgl 1933 6 : return 0;
1934 :
5129 1935 788 : ntids = 0;
1936 :
1937 : /*
1938 : * First, scan the pending list and collect any matching entries into the
1939 : * bitmap. After we scan a pending item, some other backend could post it
1940 : * into the main index, and so we might visit it a second time during the
1941 : * main scan. This is okay because we'll just re-set the same bit in the
1942 : * bitmap. (The possibility of duplicate visits is a major reason why GIN
1943 : * can't support the amgettuple API, however.) Note that it would not do
1944 : * to scan the main index before the pending list, since concurrent
1945 : * cleanup could then make us miss entries entirely.
1946 : */
1947 788 : scanPendingInsert(scan, tbm, &ntids);
1948 :
1949 : /*
1950 : * Now scan the main index.
1951 : */
5912 teodor 1952 788 : startScan(scan);
1953 :
4635 tgl 1954 788 : ItemPointerSetMin(&iptr);
1955 :
1956 : for (;;)
1957 : {
5477 1958 489836 : CHECK_FOR_INTERRUPTS();
1959 :
3357 heikki.linnakangas 1960 489836 : if (!scanGetItem(scan, iptr, &iptr, &recheck))
6186 teodor 1961 788 : break;
1962 :
5050 bruce 1963 489048 : if (ItemPointerIsLossyPage(&iptr))
5129 tgl 1964 UBC 0 : tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
1965 : else
5129 tgl 1966 CBC 489048 : tbm_add_tuples(tbm, &iptr, 1, recheck);
5477 1967 489048 : ntids++;
1968 : }
1969 :
2639 1970 788 : return ntids;
1971 : }
|