Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gist.c
4 : * interface routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gist.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/gist_private.h"
18 : #include "access/gistscan.h"
19 : #include "access/xloginsert.h"
20 : #include "catalog/pg_collation.h"
21 : #include "commands/vacuum.h"
22 : #include "miscadmin.h"
23 : #include "nodes/execnodes.h"
24 : #include "storage/lmgr.h"
25 : #include "storage/predicate.h"
26 : #include "utils/builtins.h"
27 : #include "utils/index_selfuncs.h"
28 : #include "utils/memutils.h"
29 : #include "utils/rel.h"
30 :
31 : /* non-export function prototypes */
32 : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
33 : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
34 : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
35 : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
36 : GISTSTATE *giststate,
37 : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
38 : Buffer leftchild, Buffer rightchild,
39 : bool unlockbuf, bool unlockleftchild);
40 : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
41 : GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
42 : static void gistprunepage(Relation rel, Page page, Buffer buffer,
43 : Relation heapRel);
44 :
45 :
46 : #define ROTATEDIST(d) do { \
47 : SplitedPageLayout *tmp=(SplitedPageLayout*)palloc0(sizeof(SplitedPageLayout)); \
48 : tmp->block.blkno = InvalidBlockNumber; \
49 : tmp->buffer = InvalidBuffer; \
50 : tmp->next = (d); \
51 : (d)=tmp; \
52 : } while(0)
53 :
54 :
55 : /*
56 : * GiST handler function: return IndexAmRoutine with access method parameters
57 : * and callbacks.
58 : */
59 : Datum
2639 tgl 60 CBC 2179 : gisthandler(PG_FUNCTION_ARGS)
61 : {
62 2179 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
63 :
64 2179 : amroutine->amstrategies = 0;
2537 teodor 65 2179 : amroutine->amsupport = GISTNProcs;
1105 akorotkov 66 2179 : amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
2639 tgl 67 2179 : amroutine->amcanorder = false;
68 2179 : amroutine->amcanorderbyop = true;
69 2179 : amroutine->amcanbackward = false;
70 2179 : amroutine->amcanunique = false;
71 2179 : amroutine->amcanmulticol = true;
72 2179 : amroutine->amoptionalkey = true;
73 2179 : amroutine->amsearcharray = false;
74 2179 : amroutine->amsearchnulls = true;
75 2179 : amroutine->amstorage = true;
76 2179 : amroutine->amclusterable = true;
1839 teodor 77 2179 : amroutine->ampredlocks = true;
2244 rhaas 78 2179 : amroutine->amcanparallel = false;
1491 akorotkov 79 2179 : amroutine->amcaninclude = true;
1180 akapila 80 2179 : amroutine->amusemaintenanceworkmem = false;
20 tomas.vondra 81 GNC 2179 : amroutine->amsummarizing = false;
1180 akapila 82 CBC 2179 : amroutine->amparallelvacuumoptions =
1180 akapila 83 ECB : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
2639 tgl 84 GIC 2179 : amroutine->amkeytype = InvalidOid;
2639 tgl 85 ECB :
2639 tgl 86 GIC 2179 : amroutine->ambuild = gistbuild;
2639 tgl 87 CBC 2179 : amroutine->ambuildempty = gistbuildempty;
88 2179 : amroutine->aminsert = gistinsert;
89 2179 : amroutine->ambulkdelete = gistbulkdelete;
90 2179 : amroutine->amvacuumcleanup = gistvacuumcleanup;
91 2179 : amroutine->amcanreturn = gistcanreturn;
92 2179 : amroutine->amcostestimate = gistcostestimate;
93 2179 : amroutine->amoptions = gistoptions;
2430 94 2179 : amroutine->amproperty = gistproperty;
1468 alvherre 95 2179 : amroutine->ambuildphasename = NULL;
2639 tgl 96 2179 : amroutine->amvalidate = gistvalidate;
981 97 2179 : amroutine->amadjustmembers = gistadjustmembers;
2639 98 2179 : amroutine->ambeginscan = gistbeginscan;
99 2179 : amroutine->amrescan = gistrescan;
100 2179 : amroutine->amgettuple = gistgettuple;
101 2179 : amroutine->amgetbitmap = gistgetbitmap;
102 2179 : amroutine->amendscan = gistendscan;
103 2179 : amroutine->ammarkpos = NULL;
104 2179 : amroutine->amrestrpos = NULL;
2266 rhaas 105 2179 : amroutine->amestimateparallelscan = NULL;
106 2179 : amroutine->aminitparallelscan = NULL;
107 2179 : amroutine->amparallelrescan = NULL;
2639 tgl 108 ECB :
2639 tgl 109 GIC 2179 : PG_RETURN_POINTER(amroutine);
2639 tgl 110 ECB : }
111 :
112 : /*
113 : * Create and return a temporary memory context for use by GiST. We
114 : * _always_ invoke user-provided methods in a temporary memory
115 : * context, so that memory leaks in those functions cannot cause
116 : * problems. Also, we use some additional temporary contexts in the
117 : * GiST code itself, to avoid the need to do some awkward manual
118 : * memory management.
119 : */
120 : MemoryContext
6408 bruce 121 GIC 1929 : createTempGistContext(void)
6408 bruce 122 ECB : {
6408 bruce 123 GIC 1929 : return AllocSetContextCreate(CurrentMemoryContext,
6408 bruce 124 ECB : "GiST temporary context",
125 : ALLOCSET_DEFAULT_SIZES);
126 : }
127 :
128 : /*
129 : * gistbuildempty() -- build an empty gist index in the initialization fork
130 : */
131 : void
2639 tgl 132 GIC 3 : gistbuildempty(Relation index)
4484 rhaas 133 ECB : {
134 : Buffer buffer;
135 :
136 : /* Initialize the root page */
4 andres 137 GNC 3 : buffer = ExtendBufferedRel(EB_REL(index), INIT_FORKNUM, NULL,
138 : EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
139 :
140 : /* Initialize and xlog buffer */
3709 heikki.linnakangas 141 GIC 3 : START_CRIT_SECTION();
3709 heikki.linnakangas 142 CBC 3 : GISTInitBuffer(buffer, F_LEAF);
143 3 : MarkBufferDirty(buffer);
3413 144 3 : log_newpage_buffer(buffer, true);
3709 145 3 : END_CRIT_SECTION();
3709 heikki.linnakangas 146 ECB :
147 : /* Unlock and release the buffer */
3709 heikki.linnakangas 148 GIC 3 : UnlockReleaseBuffer(buffer);
4484 rhaas 149 CBC 3 : }
4484 rhaas 150 ECB :
151 : /*
152 : * gistinsert -- wrapper for GiST tuple insertion.
153 : *
154 : * This is the public interface routine for tuple insertion in GiSTs.
155 : * It doesn't do any work; just locks the relation and passes the buck.
156 : */
157 : bool
2639 tgl 158 GIC 151307 : gistinsert(Relation r, Datum *values, bool *isnull,
2639 tgl 159 ECB : ItemPointer ht_ctid, Relation heapRel,
160 : IndexUniqueCheck checkUnique,
161 : bool indexUnchanged,
162 : IndexInfo *indexInfo)
163 : {
2250 tgl 164 GIC 151307 : GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
9344 bruce 165 ECB : IndexTuple itup;
166 : MemoryContext oldCxt;
167 :
168 : /* Initialize GISTSTATE cache if first call in this statement */
2250 tgl 169 GIC 151307 : if (giststate == NULL)
2250 tgl 170 ECB : {
2250 tgl 171 GIC 237 : oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
2250 tgl 172 CBC 237 : giststate = initGISTstate(r);
173 237 : giststate->tempCxt = createTempGistContext();
174 237 : indexInfo->ii_AmCache = (void *) giststate;
175 237 : MemoryContextSwitchTo(oldCxt);
2250 tgl 176 ECB : }
177 :
4209 tgl 178 GIC 151307 : oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
9345 bruce 179 ECB :
4209 tgl 180 GIC 151307 : itup = gistFormTuple(giststate, r,
6031 bruce 181 ECB : values, isnull, true /* size is currently bogus */ );
9345 bruce 182 GIC 151307 : itup->t_tid = *ht_ctid;
9345 bruce 183 ECB :
1467 heikki.linnakangas 184 GIC 151307 : gistdoinsert(r, itup, 0, giststate, heapRel, false);
8881 vadim4o 185 ECB :
186 : /* cleanup */
4209 tgl 187 GIC 151301 : MemoryContextSwitchTo(oldCxt);
2250 tgl 188 CBC 151301 : MemoryContextReset(giststate->tempCxt);
9345 bruce 189 ECB :
2639 tgl 190 GIC 151301 : return false;
9722 scrappy 191 ECB : }
192 :
193 :
194 : /*
195 : * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
196 : * at that offset is atomically removed along with inserting the new tuples.
197 : * This is used to replace a tuple with a new one.
198 : *
199 : * If 'leftchildbuf' is valid, we're inserting the downlink for the page
200 : * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
201 : * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
202 : *
203 : * If 'markfollowright' is true and the page is split, the left child is
204 : * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
205 : * index build, however, there is no concurrent access and the page splitting
206 : * is done in a slightly simpler fashion, and false is passed.
207 : *
208 : * If there is not enough room on the page, it is split. All the split
209 : * pages are kept pinned and locked and returned in *splitinfo, the caller
210 : * is responsible for inserting the downlinks for them. However, if
211 : * 'buffer' is the root page and it needs to be split, gistplacetopage()
212 : * performs the split as one atomic operation, and *splitinfo is set to NIL.
213 : * In that case, we continue to hold the root page locked, and the child
214 : * pages are released; note that new tuple(s) are *not* on the root page
215 : * but in one of the new child pages.
216 : *
217 : * If 'newblkno' is not NULL, returns the block number of page the first
218 : * new/updated tuple was inserted to. Usually it's the given page, but could
219 : * be its right sibling if the page was split.
220 : *
221 : * Returns 'true' if the page was split, 'false' otherwise.
222 : */
223 : bool
4231 heikki.linnakangas 224 GIC 847063 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
4490 heikki.linnakangas 225 ECB : Buffer buffer,
226 : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
227 : BlockNumber *newblkno,
228 : Buffer leftchildbuf,
229 : List **splitinfo,
230 : bool markfollowright,
231 : Relation heapRel,
232 : bool is_build)
233 : {
3888 heikki.linnakangas 234 GIC 847063 : BlockNumber blkno = BufferGetBlockNumber(buffer);
2545 kgrittn 235 CBC 847063 : Page page = BufferGetPage(buffer);
4490 heikki.linnakangas 236 847063 : bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
4490 heikki.linnakangas 237 ECB : XLogRecPtr recptr;
238 : bool is_split;
239 :
240 : /*
241 : * Refuse to modify a page that's incompletely split. This should not
242 : * happen because we finish any incomplete splits while we walk down the
243 : * tree. However, it's remotely possible that another concurrent inserter
244 : * splits a parent page, and errors out before completing the split. We
245 : * will just throw an error in that case, and leave any split we had in
246 : * progress unfinished too. The next insert that comes along will clean up
247 : * the mess.
248 : */
4490 heikki.linnakangas 249 CBC 847063 : if (GistFollowRight(page))
4490 heikki.linnakangas 250 UBC 0 : elog(ERROR, "concurrent GiST page split was incomplete");
251 :
252 : /* should never try to insert to a deleted page */
809 heikki.linnakangas 253 CBC 847063 : Assert(!GistPageIsDeleted(page));
254 :
4490 255 847063 : *splitinfo = NIL;
256 :
257 : /*
258 : * if isupdate, remove old key: This node's key has been modified, either
259 : * because a child split occurred or because we needed to adjust our key
260 : * for an insert in a child node. Therefore, remove the old version of
261 : * this node's key.
262 : *
263 : * for WAL replay, in the non-split case we handle this by setting up a
264 : * one-element todelete array; in the split case, it's handled implicitly
265 : * because the tuple vector passed to gistSplit won't include this tuple.
266 : */
4231 267 847063 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
268 :
269 : /*
270 : * If leaf page is full, try at first to delete dead tuples. And then
271 : * check again.
272 : */
2769 teodor 273 847063 : if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
274 : {
1496 heikki.linnakangas 275 UBC 0 : gistprunepage(rel, page, buffer, heapRel);
2769 teodor 276 0 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
277 : }
278 :
4490 heikki.linnakangas 279 CBC 847063 : if (is_split)
280 : {
281 : /* no space for insertion */
282 : IndexTuple *itvec;
283 : int tlen;
6408 bruce 284 12298 : SplitedPageLayout *dist = NULL,
285 : *ptr;
4490 heikki.linnakangas 286 12298 : BlockNumber oldrlink = InvalidBlockNumber;
3941 287 12298 : GistNSN oldnsn = 0;
288 : SplitedPageLayout rootpg;
289 : bool is_rootsplit;
290 : int npage;
291 :
4490 292 12298 : is_rootsplit = (blkno == GIST_ROOT_BLKNO);
293 :
294 : /*
295 : * Form index tuples vector to split. If we're replacing an old tuple,
296 : * remove the old version from the vector.
297 : */
298 12298 : itvec = gistextractpage(page, &tlen);
299 12298 : if (OffsetNumberIsValid(oldoffnum))
300 : {
301 : /* on inner page we should remove old tuple */
302 2391 : int pos = oldoffnum - FirstOffsetNumber;
303 :
6031 bruce 304 2391 : tlen--;
305 2391 : if (pos != tlen)
306 1516 : memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
307 : }
4490 heikki.linnakangas 308 12298 : itvec = gistjoinvector(itvec, &tlen, itup, ntup);
4231 309 12298 : dist = gistSplit(rel, page, itvec, tlen, giststate);
310 :
311 : /*
312 : * Check that split didn't produce too many pages.
313 : */
3293 314 12298 : npage = 0;
315 36921 : for (ptr = dist; ptr; ptr = ptr->next)
316 24623 : npage++;
317 : /* in a root split, we'll add one more page to the list below */
318 12298 : if (is_rootsplit)
319 192 : npage++;
320 12298 : if (npage > GIST_MAX_SPLIT_PAGES)
3293 heikki.linnakangas 321 UBC 0 : elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
322 : npage, GIST_MAX_SPLIT_PAGES);
323 :
324 : /*
325 : * Set up pages to work with. Allocate new buffers for all but the
326 : * leftmost page. The original page becomes the new leftmost page, and
327 : * is just replaced with the new contents.
328 : *
329 : * For a root-split, allocate new buffers for all child pages, the
330 : * original page is overwritten with new root page containing
331 : * downlinks to the new child pages.
332 : */
4490 heikki.linnakangas 333 CBC 12298 : ptr = dist;
334 12298 : if (!is_rootsplit)
335 : {
336 : /* save old rightlink and NSN */
337 12106 : oldrlink = GistPageGetOpaque(page)->rightlink;
3734 338 12106 : oldnsn = GistPageGetNSN(page);
339 :
4490 340 12106 : dist->buffer = buffer;
341 12106 : dist->block.blkno = BufferGetBlockNumber(buffer);
2545 kgrittn 342 12106 : dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
343 :
344 : /* clean all flags except F_LEAF */
6178 teodor 345 12106 : GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
346 :
4490 heikki.linnakangas 347 12106 : ptr = ptr->next;
348 : }
349 24815 : for (; ptr; ptr = ptr->next)
350 : {
351 : /* Allocate new page */
8 andres 352 GNC 12517 : ptr->buffer = gistNewBuffer(rel, heapRel);
4490 heikki.linnakangas 353 CBC 12517 : GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
2545 kgrittn 354 12517 : ptr->page = BufferGetPage(ptr->buffer);
4490 heikki.linnakangas 355 12517 : ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
1839 teodor 356 12517 : PredicateLockPageSplit(rel,
357 : BufferGetBlockNumber(buffer),
358 : BufferGetBlockNumber(ptr->buffer));
359 : }
360 :
361 : /*
362 : * Now that we know which blocks the new pages go to, set up downlink
363 : * tuples to point to them.
364 : */
6031 bruce 365 36921 : for (ptr = dist; ptr; ptr = ptr->next)
366 : {
4490 heikki.linnakangas 367 24623 : ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
368 24623 : GistTupleSetValid(ptr->itup);
369 : }
370 :
371 : /*
372 : * If this is a root split, we construct the new root page with the
373 : * downlinks here directly, instead of requiring the caller to insert
374 : * them. Add the new root page to the list along with the child pages.
375 : */
376 12298 : if (is_rootsplit)
377 : {
378 : IndexTuple *downlinks;
4382 bruce 379 192 : int ndownlinks = 0;
380 : int i;
381 :
4490 heikki.linnakangas 382 192 : rootpg.buffer = buffer;
2545 kgrittn 383 192 : rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
4490 heikki.linnakangas 384 192 : GistPageGetOpaque(rootpg.page)->flags = 0;
385 :
386 : /* Prepare a vector of all the downlinks */
387 579 : for (ptr = dist; ptr; ptr = ptr->next)
388 387 : ndownlinks++;
389 192 : downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
390 579 : for (i = 0, ptr = dist; ptr; ptr = ptr->next)
391 387 : downlinks[i++] = ptr->itup;
392 :
393 192 : rootpg.block.blkno = GIST_ROOT_BLKNO;
394 192 : rootpg.block.num = ndownlinks;
395 192 : rootpg.list = gistfillitupvec(downlinks, ndownlinks,
396 : &(rootpg.lenlist));
397 192 : rootpg.itup = NULL;
398 :
399 192 : rootpg.next = dist;
400 192 : dist = &rootpg;
401 : }
402 : else
403 : {
404 : /* Prepare split-info to be returned to caller */
405 36342 : for (ptr = dist; ptr; ptr = ptr->next)
406 : {
407 24236 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
408 :
409 24236 : si->buf = ptr->buffer;
410 24236 : si->downlink = ptr->itup;
411 24236 : *splitinfo = lappend(*splitinfo, si);
412 : }
413 : }
414 :
415 : /*
416 : * Fill all pages. All the pages are new, ie. freshly allocated empty
417 : * pages, or a temporary copy of the old page.
418 : */
419 37113 : for (ptr = dist; ptr; ptr = ptr->next)
420 : {
4382 bruce 421 24815 : char *data = (char *) (ptr->list);
422 :
228 drowley 423 GNC 923771 : for (int i = 0; i < ptr->block.num; i++)
424 : {
3888 heikki.linnakangas 425 CBC 898956 : IndexTuple thistup = (IndexTuple) data;
426 :
427 898956 : if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
4231 heikki.linnakangas 428 UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
429 :
430 : /*
431 : * If this is the first inserted/updated tuple, let the caller
432 : * know which page it landed on.
433 : */
3888 heikki.linnakangas 434 CBC 898956 : if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
435 387 : *newblkno = ptr->block.blkno;
436 :
437 898956 : data += IndexTupleSize(thistup);
438 : }
439 :
440 : /* Set up rightlinks */
4490 441 24815 : if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
442 24650 : GistPageGetOpaque(ptr->page)->rightlink =
443 12325 : ptr->next->block.blkno;
444 : else
445 12490 : GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
446 :
447 : /*
448 : * Mark the all but the right-most page with the follow-right
449 : * flag. It will be cleared as soon as the downlink is inserted
450 : * into the parent, but this ensures that if we error out before
451 : * that, the index is still consistent. (in buffering build mode,
452 : * any error will abort the index build anyway, so this is not
453 : * needed.)
454 : */
4231 455 24815 : if (ptr->next && !is_rootsplit && markfollowright)
4490 456 11746 : GistMarkFollowRight(ptr->page);
457 : else
458 13069 : GistClearFollowRight(ptr->page);
459 :
460 : /*
461 : * Copy the NSN of the original page to all pages. The
462 : * F_FOLLOW_RIGHT flags ensure that scans will follow the
463 : * rightlinks until the downlinks are inserted.
464 : */
3734 465 24815 : GistPageSetNSN(ptr->page, oldnsn);
466 : }
467 :
468 : /*
469 : * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
470 : * insertion for that. NB: The number of pages and data segments
471 : * specified here must match the calculations in gistXLogSplit()!
472 : */
1467 473 12298 : if (!is_build && RelationNeedsWAL(rel))
3062 474 1730 : XLogEnsureRecordSpace(npage, 1 + npage * 2);
475 :
6178 teodor 476 12298 : START_CRIT_SECTION();
477 :
478 : /*
479 : * Must mark buffers dirty before XLogInsert, even though we'll still
480 : * be changing their opaque fields below.
481 : */
6031 bruce 482 37113 : for (ptr = dist; ptr; ptr = ptr->next)
6218 tgl 483 24815 : MarkBufferDirty(ptr->buffer);
4490 heikki.linnakangas 484 12298 : if (BufferIsValid(leftchildbuf))
485 2364 : MarkBufferDirty(leftchildbuf);
486 :
487 : /*
488 : * The first page in the chain was a temporary working copy meant to
489 : * replace the old page. Copy it over the old page.
490 : */
2545 kgrittn 491 12298 : PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
492 12298 : dist->page = BufferGetPage(dist->buffer);
493 :
494 : /*
495 : * Write the WAL record.
496 : *
497 : * If we're building a new index, however, we don't WAL-log changes
498 : * yet. The LSN-NSN interlock between parent and child requires that
499 : * LSNs never move backwards, so set the LSNs to a value that's
500 : * smaller than any real or fake unlogged LSN that might be generated
501 : * later. (There can't be any concurrent scans during index build, so
502 : * we don't need to be able to detect concurrent splits yet.)
503 : */
1467 heikki.linnakangas 504 12298 : if (is_build)
505 10565 : recptr = GistBuildLSN;
506 : else
507 : {
508 1733 : if (RelationNeedsWAL(rel))
509 1730 : recptr = gistXLogSplit(is_leaf,
510 : dist, oldrlink, oldnsn, leftchildbuf,
511 : markfollowright);
512 : else
513 3 : recptr = gistGetFakeLSN(rel);
514 : }
515 :
6031 bruce 516 37113 : for (ptr = dist; ptr; ptr = ptr->next)
4490 heikki.linnakangas 517 24815 : PageSetLSN(ptr->page, recptr);
518 :
519 : /*
520 : * Return the new child buffers to the caller.
521 : *
522 : * If this was a root split, we've already inserted the downlink
523 : * pointers, in the form of a new root page. Therefore we can release
524 : * all the new buffers, and keep just the root page locked.
525 : */
526 12298 : if (is_rootsplit)
527 : {
528 579 : for (ptr = dist->next; ptr; ptr = ptr->next)
529 387 : UnlockReleaseBuffer(ptr->buffer);
530 : }
531 : }
532 : else
533 : {
534 : /*
535 : * Enough space. We always get here if ntup==0.
536 : */
6178 teodor 537 834765 : START_CRIT_SECTION();
538 :
539 : /*
540 : * Delete old tuple if any, then insert new tuple(s) if any. If
541 : * possible, use the fast path of PageIndexTupleOverwrite.
542 : */
4490 heikki.linnakangas 543 834765 : if (OffsetNumberIsValid(oldoffnum))
544 : {
2403 tgl 545 371373 : if (ntup == 1)
546 : {
547 : /* One-for-one replacement, so use PageIndexTupleOverwrite */
548 361640 : if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
549 361640 : IndexTupleSize(*itup)))
2403 tgl 550 UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
551 : RelationGetRelationName(rel));
552 : }
553 : else
554 : {
555 : /* Delete old, then append new tuple(s) to page */
2403 tgl 556 CBC 9733 : PageIndexTupleDelete(page, oldoffnum);
557 9733 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
558 : }
559 : }
560 : else
561 : {
562 : /* Just append new tuples at the end of the page */
563 463392 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
564 : }
565 :
4490 heikki.linnakangas 566 834765 : MarkBufferDirty(buffer);
567 :
568 834765 : if (BufferIsValid(leftchildbuf))
569 9382 : MarkBufferDirty(leftchildbuf);
570 :
1467 571 834765 : if (is_build)
572 588316 : recptr = GistBuildLSN;
573 : else
574 : {
575 246449 : if (RelationNeedsWAL(rel))
6408 bruce 576 246410 : {
1467 heikki.linnakangas 577 246410 : OffsetNumber ndeloffs = 0,
578 : deloffs[1];
579 :
580 246410 : if (OffsetNumberIsValid(oldoffnum))
581 : {
582 96869 : deloffs[0] = oldoffnum;
583 96869 : ndeloffs = 1;
584 : }
585 :
586 246410 : recptr = gistXLogUpdate(buffer,
587 : deloffs, ndeloffs, itup, ntup,
588 : leftchildbuf);
589 : }
590 : else
591 39 : recptr = gistGetFakeLSN(rel);
592 : }
593 834765 : PageSetLSN(page, recptr);
594 :
3888 595 834765 : if (newblkno)
596 52185 : *newblkno = blkno;
597 : }
598 :
599 : /*
600 : * If we inserted the downlink for a child page, set NSN and clear
601 : * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
602 : * follow the rightlink if and only if they looked at the parent page
603 : * before we inserted the downlink.
604 : *
605 : * Note that we do this *after* writing the WAL record. That means that
606 : * the possible full page image in the WAL record does not include these
607 : * changes, and they must be replayed even if the page is restored from
608 : * the full page image. There's a chicken-and-egg problem: if we updated
609 : * the child pages first, we wouldn't know the recptr of the WAL record
610 : * we're about to write.
611 : */
4490 612 847063 : if (BufferIsValid(leftchildbuf))
613 : {
2545 kgrittn 614 11746 : Page leftpg = BufferGetPage(leftchildbuf);
615 :
3734 heikki.linnakangas 616 11746 : GistPageSetNSN(leftpg, recptr);
4490 617 11746 : GistClearFollowRight(leftpg);
618 :
619 11746 : PageSetLSN(leftpg, recptr);
620 : }
621 :
622 847063 : END_CRIT_SECTION();
623 :
624 847063 : return is_split;
625 : }
626 :
627 : /*
628 : * Workhouse routine for doing insertion into a GiST index. Note that
629 : * this routine assumes it is invoked in a short-lived memory context,
630 : * so it does not bother releasing palloc'd allocations.
631 : */
632 : void
1570 akorotkov 633 455566 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
634 : GISTSTATE *giststate, Relation heapRel, bool is_build)
635 : {
636 : ItemId iid;
637 : IndexTuple idxtuple;
638 : GISTInsertStack firststack;
639 : GISTInsertStack *stack;
640 : GISTInsertState state;
4490 heikki.linnakangas 641 455566 : bool xlocked = false;
642 :
643 455566 : memset(&state, 0, sizeof(GISTInsertState));
644 455566 : state.freespace = freespace;
645 455566 : state.r = r;
1570 akorotkov 646 455566 : state.heapRel = heapRel;
1467 heikki.linnakangas 647 455566 : state.is_build = is_build;
648 :
649 : /* Start from the root */
4490 650 455566 : firststack.blkno = GIST_ROOT_BLKNO;
3941 651 455566 : firststack.lsn = 0;
1426 652 455566 : firststack.retry_from_parent = false;
4490 653 455566 : firststack.parent = NULL;
4286 654 455566 : firststack.downlinkoffnum = InvalidOffsetNumber;
4490 655 455566 : state.stack = stack = &firststack;
656 :
657 : /*
658 : * Walk down along the path of smallest penalty, updating the parent
659 : * pointers with the key we're inserting as we go. If we crash in the
660 : * middle, the tree is consistent, although the possible parent updates
661 : * were a waste.
662 : */
663 : for (;;)
664 : {
665 : /*
666 : * If we split an internal page while descending the tree, we have to
667 : * retry at the parent. (Normally, the LSN-NSN interlock below would
668 : * also catch this and cause us to retry. But LSNs are not updated
669 : * during index build.)
670 : */
1426 671 1007742 : while (stack->retry_from_parent)
672 : {
1426 heikki.linnakangas 673 UBC 0 : if (xlocked)
674 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
675 0 : xlocked = false;
676 0 : ReleaseBuffer(stack->buffer);
677 0 : state.stack = stack = stack->parent;
678 : }
679 :
4490 heikki.linnakangas 680 CBC 1007742 : if (XLogRecPtrIsInvalid(stack->lsn))
681 1007724 : stack->buffer = ReadBuffer(state.r, stack->blkno);
682 :
683 : /*
684 : * Be optimistic and grab shared lock first. Swap it for an exclusive
685 : * lock later if we need to update the page.
686 : */
687 1007742 : if (!xlocked)
688 : {
689 1007742 : LockBuffer(stack->buffer, GIST_SHARE);
690 1007742 : gistcheckpage(state.r, stack->buffer);
691 : }
692 :
2545 kgrittn 693 1007742 : stack->page = (Page) BufferGetPage(stack->buffer);
1916 alvherre 694 1007742 : stack->lsn = xlocked ?
695 1007742 : PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
4490 heikki.linnakangas 696 1007742 : Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
697 :
698 : /*
699 : * If this page was split but the downlink was never inserted to the
700 : * parent because the inserting backend crashed before doing that, fix
701 : * that now.
702 : */
703 1007742 : if (GistFollowRight(stack->page))
704 : {
4490 heikki.linnakangas 705 UBC 0 : if (!xlocked)
706 : {
707 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
708 0 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
709 0 : xlocked = true;
710 : /* someone might've completed the split when we unlocked */
711 0 : if (!GistFollowRight(stack->page))
712 0 : continue;
713 : }
714 0 : gistfixsplit(&state, giststate);
715 :
716 0 : UnlockReleaseBuffer(stack->buffer);
717 0 : xlocked = false;
718 0 : state.stack = stack = stack->parent;
719 0 : continue;
720 : }
721 :
1355 heikki.linnakangas 722 CBC 1559900 : if ((stack->blkno != GIST_ROOT_BLKNO &&
723 552158 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
724 1007742 : GistPageIsDeleted(stack->page))
725 : {
726 : /*
727 : * Concurrent split or page deletion detected. There's no
728 : * guarantee that the downlink for this page is consistent with
729 : * the tuple we're inserting anymore, so go back to parent and
730 : * rechoose the best child.
731 : */
4490 heikki.linnakangas 732 UBC 0 : UnlockReleaseBuffer(stack->buffer);
733 0 : xlocked = false;
734 0 : state.stack = stack = stack->parent;
6495 teodor 735 0 : continue;
736 : }
737 :
4490 heikki.linnakangas 738 CBC 1007742 : if (!GistPageIsLeaf(stack->page))
739 : {
740 : /*
741 : * This is an internal page so continue to walk down the tree.
742 : * Find the child node that has the minimum insertion penalty.
743 : */
744 : BlockNumber childblkno;
745 : IndexTuple newtup;
746 : GISTInsertStack *item;
747 : OffsetNumber downlinkoffnum;
748 :
4286 749 552176 : downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
750 552176 : iid = PageGetItemId(stack->page, downlinkoffnum);
4490 751 552176 : idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
752 552176 : childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
753 :
754 : /*
755 : * Check that it's not a leftover invalid tuple from pre-9.1
756 : */
757 552176 : if (GistTupleIsInvalid(idxtuple))
4490 heikki.linnakangas 758 UBC 0 : ereport(ERROR,
759 : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
760 : RelationGetRelationName(r)),
761 : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
762 : errhint("Please REINDEX it.")));
763 :
764 : /*
765 : * Check that the key representing the target child node is
766 : * consistent with the key we're inserting. Update it if it's not.
767 : */
4490 heikki.linnakangas 768 CBC 552176 : newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
769 552176 : if (newtup)
770 : {
771 : /*
772 : * Swap shared lock for an exclusive one. Beware, the page may
773 : * change while we unlock/lock the page...
774 : */
775 327185 : if (!xlocked)
776 : {
777 327185 : LockBuffer(stack->buffer, GIST_UNLOCK);
778 327185 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
779 327185 : xlocked = true;
2545 kgrittn 780 327185 : stack->page = (Page) BufferGetPage(stack->buffer);
781 :
3754 alvherre 782 327185 : if (PageGetLSN(stack->page) != stack->lsn)
783 : {
784 : /* the page was changed while we unlocked it, retry */
4490 heikki.linnakangas 785 UBC 0 : continue;
786 : }
787 : }
788 :
789 : /*
790 : * Update the tuple.
791 : *
792 : * We still hold the lock after gistinserttuple(), but it
793 : * might have to split the page to make the updated tuple fit.
794 : * In that case the updated tuple might migrate to the other
795 : * half of the split, so we have to go back to the parent and
796 : * descend back to the half that's a better fit for the new
797 : * tuple.
798 : */
3985 heikki.linnakangas 799 CBC 327185 : if (gistinserttuple(&state, stack, giststate, newtup,
800 : downlinkoffnum))
801 : {
802 : /*
803 : * If this was a root split, the root page continues to be
804 : * the parent and the updated tuple went to one of the
805 : * child pages, so we just need to retry from the root
806 : * page.
807 : */
4473 808 18 : if (stack->blkno != GIST_ROOT_BLKNO)
809 : {
810 18 : UnlockReleaseBuffer(stack->buffer);
811 18 : xlocked = false;
812 18 : state.stack = stack = stack->parent;
813 : }
4490 814 18 : continue;
815 : }
816 : }
817 552158 : LockBuffer(stack->buffer, GIST_UNLOCK);
818 552158 : xlocked = false;
819 :
820 : /* descend to the chosen child */
821 552158 : item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
822 552158 : item->blkno = childblkno;
823 552158 : item->parent = stack;
4286 824 552158 : item->downlinkoffnum = downlinkoffnum;
4490 825 552158 : state.stack = stack = item;
826 : }
827 : else
828 : {
829 : /*
830 : * Leaf page. Insert the new key. We've already updated all the
831 : * parents on the way down, but we might have to split the page if
832 : * it doesn't fit. gistinserttuple() will take care of that.
833 : */
834 :
835 : /*
836 : * Swap shared lock for an exclusive one. Be careful, the page may
837 : * change while we unlock/lock the page...
838 : */
839 455566 : if (!xlocked)
840 : {
841 455566 : LockBuffer(stack->buffer, GIST_UNLOCK);
842 455566 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
843 455566 : xlocked = true;
2545 kgrittn 844 455566 : stack->page = (Page) BufferGetPage(stack->buffer);
4490 heikki.linnakangas 845 455566 : stack->lsn = PageGetLSN(stack->page);
846 :
847 455566 : if (stack->blkno == GIST_ROOT_BLKNO)
848 : {
849 : /*
850 : * the only page that can become inner instead of leaf is
851 : * the root page, so for root we should recheck it
852 : */
853 25879 : if (!GistPageIsLeaf(stack->page))
854 : {
855 : /*
856 : * very rare situation: during unlock/lock index with
857 : * number of pages = 1 was increased
858 : */
4490 heikki.linnakangas 859 UBC 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
860 0 : xlocked = false;
861 0 : continue;
862 : }
863 :
864 : /*
865 : * we don't need to check root split, because checking
866 : * leaf/inner is enough to recognize split for root
867 : */
868 : }
1355 heikki.linnakangas 869 CBC 859374 : else if ((GistFollowRight(stack->page) ||
809 870 429687 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
1355 871 429687 : GistPageIsDeleted(stack->page))
872 : {
873 : /*
874 : * The page was split or deleted while we momentarily
875 : * unlocked the page. Go back to parent.
876 : */
4490 heikki.linnakangas 877 UBC 0 : UnlockReleaseBuffer(stack->buffer);
878 0 : xlocked = false;
879 0 : state.stack = stack = stack->parent;
880 0 : continue;
881 : }
882 : }
883 :
884 : /* now state.stack->(page, buffer and blkno) points to leaf page */
885 :
3985 heikki.linnakangas 886 CBC 455566 : gistinserttuple(&state, stack, giststate, itup,
887 : InvalidOffsetNumber);
4490 888 455560 : LockBuffer(stack->buffer, GIST_UNLOCK);
889 :
890 : /* Release any pins we might still hold before exiting */
891 1463254 : for (; stack; stack = stack->parent)
892 1007694 : ReleaseBuffer(stack->buffer);
6508 teodor 893 455560 : break;
894 : }
895 : }
6495 896 455560 : }
897 :
898 : /*
899 : * Traverse the tree to find path from root page to specified "child" block.
900 : *
901 : * returns a new insertion stack, starting from the parent of "child", up
902 : * to the root. *downlinkoffnum is set to the offset of the downlink in the
903 : * direct parent of child.
904 : *
905 : * To prevent deadlocks, this should lock only one page at a time.
906 : */
907 : static GISTInsertStack *
4286 heikki.linnakangas 908 UBC 0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
909 : {
910 : Page page;
911 : Buffer buffer;
912 : OffsetNumber i,
913 : maxoff;
914 : ItemId iid;
915 : IndexTuple idxtuple;
916 : List *fifo;
917 : GISTInsertStack *top,
918 : *ptr;
919 : BlockNumber blkno;
920 :
921 0 : top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6495 teodor 922 0 : top->blkno = GIST_ROOT_BLKNO;
4286 heikki.linnakangas 923 0 : top->downlinkoffnum = InvalidOffsetNumber;
924 :
925 0 : fifo = list_make1(top);
926 0 : while (fifo != NIL)
927 : {
928 : /* Get next page to visit */
929 0 : top = linitial(fifo);
930 0 : fifo = list_delete_first(fifo);
931 :
6219 tgl 932 0 : buffer = ReadBuffer(r, top->blkno);
933 0 : LockBuffer(buffer, GIST_SHARE);
6363 934 0 : gistcheckpage(r, buffer);
2545 kgrittn 935 0 : page = (Page) BufferGetPage(buffer);
936 :
6408 bruce 937 0 : if (GistPageIsLeaf(page))
938 : {
939 : /*
940 : * Because we scan the index top-down, all the rest of the pages
941 : * in the queue must be leaf pages as well.
942 : */
6218 tgl 943 0 : UnlockReleaseBuffer(buffer);
4286 heikki.linnakangas 944 0 : break;
945 : }
946 :
947 : /* currently, internal pages are never deleted */
1355 948 0 : Assert(!GistPageIsDeleted(page));
949 :
1916 alvherre 950 0 : top->lsn = BufferGetLSNAtomic(buffer);
951 :
952 : /*
953 : * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
954 : * downlink. This should not normally happen..
955 : */
4490 heikki.linnakangas 956 0 : if (GistFollowRight(page))
957 0 : elog(ERROR, "concurrent GiST page split was incomplete");
958 :
3734 959 0 : if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
6408 bruce 960 0 : GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
961 : {
962 : /*
963 : * Page was split while we looked elsewhere. We didn't see the
964 : * downlink to the right page when we scanned the parent, so add
965 : * it to the queue now.
966 : *
967 : * Put the right page ahead of the queue, so that we visit it
968 : * next. That's important, because if this is the lowest internal
969 : * level, just above leaves, we might already have queued up some
970 : * leaf pages, and we assume that there can't be any non-leaf
971 : * pages behind leaf pages.
972 : */
973 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6495 teodor 974 0 : ptr->blkno = GistPageGetOpaque(page)->rightlink;
4286 heikki.linnakangas 975 0 : ptr->downlinkoffnum = InvalidOffsetNumber;
976 0 : ptr->parent = top->parent;
977 :
978 0 : fifo = lcons(ptr, fifo);
979 : }
980 :
6495 teodor 981 0 : maxoff = PageGetMaxOffsetNumber(page);
982 :
6408 bruce 983 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
984 : {
6495 teodor 985 0 : iid = PageGetItemId(page, i);
986 0 : idxtuple = (IndexTuple) PageGetItem(page, iid);
987 0 : blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
6408 bruce 988 0 : if (blkno == child)
989 : {
990 : /* Found it! */
6218 tgl 991 0 : UnlockReleaseBuffer(buffer);
4286 heikki.linnakangas 992 0 : *downlinkoffnum = i;
6495 teodor 993 0 : return top;
994 : }
995 : else
996 : {
997 : /* Append this child to the list of pages to visit later */
6408 bruce 998 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6495 teodor 999 0 : ptr->blkno = blkno;
4286 heikki.linnakangas 1000 0 : ptr->downlinkoffnum = i;
6495 teodor 1001 0 : ptr->parent = top;
1002 :
4286 heikki.linnakangas 1003 0 : fifo = lappend(fifo, ptr);
1004 : }
1005 : }
1006 :
6218 tgl 1007 0 : UnlockReleaseBuffer(buffer);
1008 : }
1009 :
4286 heikki.linnakangas 1010 0 : elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1011 : RelationGetRelationName(r), child);
1012 : return NULL; /* keep compiler quiet */
1013 : }
1014 :
1015 : /*
1016 : * Updates the stack so that child->parent is the correct parent of the
1017 : * child. child->parent must be exclusively locked on entry, and will
1018 : * remain so at exit, but it might not be the same page anymore.
1019 : */
1020 : static void
6408 bruce 1021 CBC 11746 : gistFindCorrectParent(Relation r, GISTInsertStack *child)
1022 : {
1023 11746 : GISTInsertStack *parent = child->parent;
1024 :
6363 tgl 1025 11746 : gistcheckpage(r, parent->buffer);
2545 kgrittn 1026 11746 : parent->page = (Page) BufferGetPage(parent->buffer);
1027 :
1028 : /* here we don't need to distinguish between split and page update */
3754 alvherre 1029 23492 : if (child->downlinkoffnum == InvalidOffsetNumber ||
1030 11746 : parent->lsn != PageGetLSN(parent->page))
1031 : {
1032 : /* parent is changed, look child in right links until found */
1033 : OffsetNumber i,
1034 : maxoff;
1035 : ItemId iid;
1036 : IndexTuple idxtuple;
1037 : GISTInsertStack *ptr;
1038 :
1039 : while (true)
1040 : {
6495 teodor 1041 1587 : maxoff = PageGetMaxOffsetNumber(parent->page);
6408 bruce 1042 74464 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1043 : {
6495 teodor 1044 74464 : iid = PageGetItemId(parent->page, i);
1045 74464 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
6408 bruce 1046 74464 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1047 : {
1048 : /* yes!!, found */
4286 heikki.linnakangas 1049 1587 : child->downlinkoffnum = i;
6495 teodor 1050 1587 : return;
1051 : }
1052 : }
1053 :
6408 bruce 1054 UBC 0 : parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
6218 tgl 1055 0 : UnlockReleaseBuffer(parent->buffer);
6408 bruce 1056 0 : if (parent->blkno == InvalidBlockNumber)
1057 : {
1058 : /*
1059 : * End of chain and still didn't find parent. It's a very-very
1060 : * rare situation when root splitted.
1061 : */
6495 teodor 1062 0 : break;
1063 : }
6408 bruce 1064 0 : parent->buffer = ReadBuffer(r, parent->blkno);
1065 0 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
6363 tgl 1066 0 : gistcheckpage(r, parent->buffer);
2545 kgrittn 1067 0 : parent->page = (Page) BufferGetPage(parent->buffer);
1068 : }
1069 :
1070 : /*
1071 : * awful!!, we need search tree to find parent ... , but before we
1072 : * should release all old parent
1073 : */
1074 :
6408 bruce 1075 0 : ptr = child->parent->parent; /* child->parent already released
1076 : * above */
1077 0 : while (ptr)
1078 : {
1079 0 : ReleaseBuffer(ptr->buffer);
6495 teodor 1080 0 : ptr = ptr->parent;
1081 : }
1082 :
1083 : /* ok, find new path */
4286 heikki.linnakangas 1084 0 : ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1085 :
1086 : /* read all buffers as expected by caller */
1087 : /* note we don't lock them or gistcheckpage them here! */
6408 bruce 1088 0 : while (ptr)
1089 : {
1090 0 : ptr->buffer = ReadBuffer(r, ptr->blkno);
2545 kgrittn 1091 0 : ptr->page = (Page) BufferGetPage(ptr->buffer);
6495 teodor 1092 0 : ptr = ptr->parent;
1093 : }
1094 :
1095 : /* install new chain of parents to stack */
1096 0 : child->parent = parent;
1097 :
1098 : /* make recursive call to normal processing */
4490 heikki.linnakangas 1099 0 : LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
6408 bruce 1100 0 : gistFindCorrectParent(r, child);
1101 : }
1102 : }
1103 :
1104 : /*
1105 : * Form a downlink pointer for the page in 'buf'.
1106 : */
1107 : static IndexTuple
4490 heikki.linnakangas 1108 0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1109 : GISTInsertStack *stack)
1110 : {
2545 kgrittn 1111 0 : Page page = BufferGetPage(buf);
1112 : OffsetNumber maxoff;
1113 : OffsetNumber offset;
4382 bruce 1114 0 : IndexTuple downlink = NULL;
1115 :
4490 heikki.linnakangas 1116 0 : maxoff = PageGetMaxOffsetNumber(page);
1117 0 : for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1118 : {
1119 : IndexTuple ituple = (IndexTuple)
4382 bruce 1120 0 : PageGetItem(page, PageGetItemId(page, offset));
1121 :
4490 heikki.linnakangas 1122 0 : if (downlink == NULL)
1123 0 : downlink = CopyIndexTuple(ituple);
1124 : else
1125 : {
1126 : IndexTuple newdownlink;
1127 :
1128 0 : newdownlink = gistgetadjusted(rel, downlink, ituple,
1129 : giststate);
1130 0 : if (newdownlink)
1131 0 : downlink = newdownlink;
1132 : }
1133 : }
1134 :
1135 : /*
1136 : * If the page is completely empty, we can't form a meaningful downlink
1137 : * for it. But we have to insert a downlink for the page. Any key will do,
1138 : * as long as its consistent with the downlink of parent page, so that we
1139 : * can legally insert it to the parent. A minimal one that matches as few
1140 : * scans as possible would be best, to keep scans from doing useless work,
1141 : * but we don't know how to construct that. So we just use the downlink of
1142 : * the original page that was split - that's as far from optimal as it can
1143 : * get but will do..
1144 : */
1145 0 : if (!downlink)
1146 : {
1147 : ItemId iid;
1148 :
1149 0 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1150 0 : gistFindCorrectParent(rel, stack);
4286 1151 0 : iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
4490 1152 0 : downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1153 0 : downlink = CopyIndexTuple(downlink);
1154 0 : LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1155 : }
1156 :
1157 0 : ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
1158 0 : GistTupleSetValid(downlink);
1159 :
1160 0 : return downlink;
1161 : }
1162 :
1163 :
1164 : /*
1165 : * Complete the incomplete split of state->stack->page.
1166 : */
1167 : static void
1168 0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
1169 : {
1170 0 : GISTInsertStack *stack = state->stack;
1171 : Buffer buf;
1172 : Page page;
1173 0 : List *splitinfo = NIL;
1174 :
856 peter 1175 0 : ereport(LOG,
1176 : (errmsg("fixing incomplete split in index \"%s\", block %u",
1177 : RelationGetRelationName(state->r), stack->blkno)));
1178 :
4490 heikki.linnakangas 1179 0 : Assert(GistFollowRight(stack->page));
4286 1180 0 : Assert(OffsetNumberIsValid(stack->downlinkoffnum));
1181 :
4490 1182 0 : buf = stack->buffer;
1183 :
1184 : /*
1185 : * Read the chain of split pages, following the rightlinks. Construct a
1186 : * downlink tuple for each page.
1187 : */
1188 : for (;;)
1189 0 : {
1190 0 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
1191 : IndexTuple downlink;
1192 :
2545 kgrittn 1193 0 : page = BufferGetPage(buf);
1194 :
1195 : /* Form the new downlink tuples to insert to parent */
4490 heikki.linnakangas 1196 0 : downlink = gistformdownlink(state->r, buf, giststate, stack);
1197 :
1198 0 : si->buf = buf;
1199 0 : si->downlink = downlink;
1200 :
1201 0 : splitinfo = lappend(splitinfo, si);
1202 :
1203 0 : if (GistFollowRight(page))
1204 : {
1205 : /* lock next page */
1206 0 : buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1207 0 : LockBuffer(buf, GIST_EXCLUSIVE);
1208 : }
1209 : else
1210 0 : break;
1211 : }
1212 :
1213 : /* Insert the downlinks */
3985 1214 0 : gistfinishsplit(state, stack, giststate, splitinfo, false);
4490 1215 0 : }
1216 :
1217 : /*
1218 : * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1219 : * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1220 : * 'stack' represents the path from the root to the page being updated.
1221 : *
1222 : * The caller must hold an exclusive lock on stack->buffer. The lock is still
1223 : * held on return, but the page might not contain the inserted tuple if the
1224 : * page was split. The function returns true if the page was split, false
1225 : * otherwise.
1226 : */
1227 : static bool
3985 heikki.linnakangas 1228 CBC 782751 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1229 : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1230 : {
1231 782751 : return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1232 : InvalidBuffer, InvalidBuffer, false, false);
1233 : }
1234 :
1235 : /* ----------------
1236 : * An extended workhorse version of gistinserttuple(). This version allows
1237 : * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1238 : * This is used to recursively update the downlinks in the parent when a page
1239 : * is split.
1240 : *
1241 : * If leftchild and rightchild are valid, we're inserting/replacing the
1242 : * downlink for rightchild, and leftchild is its left sibling. We clear the
1243 : * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1244 : * insertion of the downlink.
1245 : *
1246 : * To avoid holding locks for longer than necessary, when recursing up the
1247 : * tree to update the parents, the locking is a bit peculiar here. On entry,
1248 : * the caller must hold an exclusive lock on stack->buffer, as well as
1249 : * leftchild and rightchild if given. On return:
1250 : *
1251 : * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1252 : * always kept pinned, however.
1253 : * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1254 : * is kept pinned.
1255 : * - Lock and pin on 'rightchild' are always released.
1256 : *
1257 : * Returns 'true' if the page had to be split. Note that if the page was
1258 : * split, the inserted/updated tuples might've been inserted to a right
1259 : * sibling of stack->buffer instead of stack->buffer itself.
1260 : */
1261 : static bool
4490 1262 794497 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
1263 : GISTSTATE *giststate,
1264 : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1265 : Buffer leftchild, Buffer rightchild,
1266 : bool unlockbuf, bool unlockleftchild)
1267 : {
1268 : List *splitinfo;
1269 : bool is_split;
1270 :
1271 : /*
1272 : * Check for any rw conflicts (in serializable isolation level) just
1273 : * before we intend to modify the page
1274 : */
1167 tmunro 1275 794497 : CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
1276 :
1277 : /* Insert the tuple(s) to the page, splitting the page if necessary */
4231 heikki.linnakangas 1278 794491 : is_split = gistplacetopage(state->r, state->freespace, giststate,
1279 : stack->buffer,
1280 : tuples, ntup,
1281 : oldoffnum, NULL,
1282 : leftchild,
1283 : &splitinfo,
1284 : true,
1285 : state->heapRel,
1467 1286 794491 : state->is_build);
1287 :
1288 : /*
1289 : * Before recursing up in case the page was split, release locks on the
1290 : * child pages. We don't need to keep them locked when updating the
1291 : * parent.
1292 : */
3985 1293 794491 : if (BufferIsValid(rightchild))
1294 11746 : UnlockReleaseBuffer(rightchild);
1295 794491 : if (BufferIsValid(leftchild) && unlockleftchild)
1296 2317 : LockBuffer(leftchild, GIST_UNLOCK);
1297 :
1298 : /*
1299 : * If we had to split, insert/update the downlinks in the parent. If the
1300 : * caller requested us to release the lock on stack->buffer, tell
1301 : * gistfinishsplit() to do that as soon as it's safe to do so. If we
1302 : * didn't have to split, release it ourselves.
1303 : */
4490 1304 794491 : if (splitinfo)
3985 1305 11722 : gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1306 782769 : else if (unlockbuf)
1307 9405 : LockBuffer(stack->buffer, GIST_UNLOCK);
1308 :
4490 1309 794491 : return is_split;
1310 : }
1311 :
1312 : /*
1313 : * Finish an incomplete split by inserting/updating the downlinks in parent
1314 : * page. 'splitinfo' contains all the child pages involved in the split,
1315 : * from left-to-right.
1316 : *
1317 : * On entry, the caller must hold a lock on stack->buffer and all the child
1318 : * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1319 : * released on return. The child pages are always unlocked and unpinned.
1320 : */
1321 : static void
1322 11722 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1323 : GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1324 : {
1325 : GISTPageSplitInfo *right;
1326 : GISTPageSplitInfo *left;
1327 : IndexTuple tuples[2];
1328 :
1329 : /* A split always contains at least two halves */
1330 11722 : Assert(list_length(splitinfo) >= 2);
1331 :
1332 : /*
1333 : * We need to insert downlinks for each new page, and update the downlink
1334 : * for the original (leftmost) page in the split. Begin at the rightmost
1335 : * page, inserting one downlink at a time until there's only two pages
1336 : * left. Finally insert the downlink for the last new page and update the
1337 : * downlink for the original page as one operation.
1338 : */
1339 11722 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1340 :
1341 : /*
1342 : * Insert downlinks for the siblings from right to left, until there are
1343 : * only two siblings left.
1344 : */
1362 tgl 1345 11746 : for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1346 : {
1347 24 : right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1348 24 : left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1349 :
1210 heikki.linnakangas 1350 24 : gistFindCorrectParent(state->r, stack);
4490 1351 24 : if (gistinserttuples(state, stack->parent, giststate,
1352 : &right->downlink, 1,
1353 : InvalidOffsetNumber,
1354 : left->buf, right->buf, false, false))
1355 : {
1356 : /*
1357 : * If the parent page was split, the existing downlink might have
1358 : * moved.
1359 : */
1213 heikki.linnakangas 1360 UBC 0 : stack->downlinkoffnum = InvalidOffsetNumber;
1361 : }
1362 : /* gistinserttuples() released the lock on right->buf. */
1363 : }
1364 :
1362 tgl 1365 CBC 11722 : right = (GISTPageSplitInfo *) lsecond(splitinfo);
1366 11722 : left = (GISTPageSplitInfo *) linitial(splitinfo);
1367 :
1368 : /*
1369 : * Finally insert downlink for the remaining right page and update the
1370 : * downlink for the original page to not contain the tuples that were
1371 : * moved to the new pages.
1372 : */
4490 heikki.linnakangas 1373 11722 : tuples[0] = left->downlink;
1374 11722 : tuples[1] = right->downlink;
1210 1375 11722 : gistFindCorrectParent(state->r, stack);
1376 11722 : if (gistinserttuples(state, stack->parent, giststate,
1377 : tuples, 2,
1378 11722 : stack->downlinkoffnum,
1379 : left->buf, right->buf,
1380 : true, /* Unlock parent */
1381 : unlockbuf /* Unlock stack->buffer if caller wants
1382 : * that */
1383 : ))
1384 : {
1385 : /*
1386 : * If the parent page was split, the downlink might have moved.
1387 : */
1388 2364 : stack->downlinkoffnum = InvalidOffsetNumber;
1389 : }
1390 :
4490 1391 11722 : Assert(left->buf == stack->buffer);
1392 :
1393 : /*
1394 : * If we split the page because we had to adjust the downlink on an
1395 : * internal page, while descending the tree for inserting a new tuple,
1396 : * then this might no longer be the correct page for the new tuple. The
1397 : * downlink to this page might not cover the new tuple anymore, it might
1398 : * need to go to the newly-created right sibling instead. Tell the caller
1399 : * to walk back up the stack, to re-check at the parent which page to
1400 : * insert to.
1401 : *
1402 : * Normally, the LSN-NSN interlock during the tree descend would also
1403 : * detect that a concurrent split happened (by ourselves), and cause us to
1404 : * retry at the parent. But that mechanism doesn't work during index
1405 : * build, because we don't do WAL-logging, and don't update LSNs, during
1406 : * index build.
1407 : */
1426 1408 11722 : stack->retry_from_parent = true;
6502 teodor 1409 11722 : }
1410 :
1411 : /*
1412 : * gistSplit -- split a page in the tree and fill struct
1413 : * used for XLOG and real writes buffers. Function is recursive, ie
1414 : * it will split page until keys will fit in every page.
1415 : */
1416 : SplitedPageLayout *
9722 scrappy 1417 12884 : gistSplit(Relation r,
1418 : Page page,
1419 : IndexTuple *itup, /* contains compressed entry */
1420 : int len,
1421 : GISTSTATE *giststate)
1422 : {
1423 : IndexTuple *lvectup,
1424 : *rvectup;
1425 : GistSplitVector v;
1426 : int i;
6031 bruce 1427 12884 : SplitedPageLayout *res = NULL;
1428 :
1429 : /* this should never recurse very deeply, but better safe than sorry */
3110 heikki.linnakangas 1430 12884 : check_stack_depth();
1431 :
1432 : /* there's no point in splitting an empty page */
1433 12884 : Assert(len > 0);
1434 :
1435 : /*
1436 : * If a single tuple doesn't fit on a page, no amount of splitting will
1437 : * help.
1438 : */
1439 12884 : if (len == 1)
3110 heikki.linnakangas 1440 UBC 0 : ereport(ERROR,
1441 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1442 : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1443 : IndexTupleSize(itup[0]), GiSTPageSize,
1444 : RelationGetRelationName(r))));
1445 :
1491 akorotkov 1446 CBC 12884 : memset(v.spl_lisnull, true,
1447 12884 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
1448 12884 : memset(v.spl_risnull, true,
1449 12884 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
3710 tgl 1450 12884 : gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1451 :
1452 : /* form left and right vector */
6178 teodor 1453 12884 : lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1454 12884 : rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1455 :
6129 1456 540908 : for (i = 0; i < v.splitVector.spl_nleft; i++)
1457 528024 : lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1458 :
1459 605738 : for (i = 0; i < v.splitVector.spl_nright; i++)
1460 592854 : rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1461 :
1462 : /* finalize splitting (may need another split) */
1463 12884 : if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1464 : {
1465 292 : res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1466 : }
1467 : else
1468 : {
6178 1469 12592 : ROTATEDIST(res);
6129 1470 12592 : res->block.num = v.splitVector.spl_nright;
6031 bruce 1471 12592 : res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
4490 heikki.linnakangas 1472 12592 : res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1473 : }
1474 :
6129 teodor 1475 12884 : if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1476 : {
1477 : SplitedPageLayout *resptr,
1478 : *subres;
1479 :
1480 157 : resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1481 :
1482 : /* install on list's tail */
6031 bruce 1483 395 : while (resptr->next)
6178 teodor 1484 238 : resptr = resptr->next;
1485 :
1486 157 : resptr->next = res;
1487 157 : res = subres;
1488 : }
1489 : else
1490 : {
1491 12727 : ROTATEDIST(res);
6129 1492 12727 : res->block.num = v.splitVector.spl_nleft;
6031 bruce 1493 12727 : res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
4490 heikki.linnakangas 1494 12727 : res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1495 : }
1496 :
6178 teodor 1497 12884 : return res;
1498 : }
1499 :
1500 : /*
1501 : * Create a GISTSTATE and fill it with information about the index
1502 : */
1503 : GISTSTATE *
4209 tgl 1504 1788 : initGISTstate(Relation index)
1505 : {
1506 : GISTSTATE *giststate;
1507 : MemoryContext scanCxt;
1508 : MemoryContext oldCxt;
1509 : int i;
1510 :
1511 : /* safety check to protect fixed-size arrays in GISTSTATE */
7901 1512 1788 : if (index->rd_att->natts > INDEX_MAX_KEYS)
7202 tgl 1513 UBC 0 : elog(ERROR, "numberOfAttributes %d > %d",
1514 : index->rd_att->natts, INDEX_MAX_KEYS);
1515 :
1516 : /* Create the memory context that will hold the GISTSTATE */
4209 tgl 1517 CBC 1788 : scanCxt = AllocSetContextCreate(CurrentMemoryContext,
1518 : "GiST scan context",
1519 : ALLOCSET_DEFAULT_SIZES);
1520 1788 : oldCxt = MemoryContextSwitchTo(scanCxt);
1521 :
1522 : /* Create and fill in the GISTSTATE */
1523 1788 : giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1524 :
1525 1788 : giststate->scanCxt = scanCxt;
2118 1526 1788 : giststate->tempCxt = scanCxt; /* caller must change this if needed */
1491 akorotkov 1527 1788 : giststate->leafTupdesc = index->rd_att;
1528 :
1529 : /*
1530 : * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1531 : * the INCLUDE attributes.
1532 : *
1533 : * It is used to form tuples during tuple adjustment and page split.
1534 : * B-tree creates shortened tuple descriptor for every truncated tuple,
1535 : * because it is doing this less often: it does not have to form truncated
1536 : * tuples during page split. Also, B-tree is not adjusting tuples on
1537 : * internal pages the way GiST does.
1538 : */
1539 1788 : giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
1540 1788 : giststate->nonLeafTupdesc->natts =
1541 1788 : IndexRelationGetNumberOfKeyAttributes(index);
1542 :
1543 3736 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
1544 : {
7855 tgl 1545 1948 : fmgr_info_copy(&(giststate->consistentFn[i]),
6536 neilc 1546 1948 : index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1547 : scanCxt);
7855 tgl 1548 1948 : fmgr_info_copy(&(giststate->unionFn[i]),
7836 bruce 1549 1948 : index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1550 : scanCxt);
1551 :
1552 : /* opclasses are not required to provide a Compress method */
2028 tgl 1553 1948 : if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
1554 1448 : fmgr_info_copy(&(giststate->compressFn[i]),
1555 1448 : index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1556 : scanCxt);
1557 : else
1558 500 : giststate->compressFn[i].fn_oid = InvalidOid;
1559 :
1560 : /* opclasses are not required to provide a Decompress method */
1561 1948 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
1562 745 : fmgr_info_copy(&(giststate->decompressFn[i]),
1563 745 : index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1564 : scanCxt);
1565 : else
1566 1203 : giststate->decompressFn[i].fn_oid = InvalidOid;
1567 :
7855 1568 1948 : fmgr_info_copy(&(giststate->penaltyFn[i]),
7836 bruce 1569 1948 : index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1570 : scanCxt);
7855 tgl 1571 1948 : fmgr_info_copy(&(giststate->picksplitFn[i]),
6536 neilc 1572 1948 : index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1573 : scanCxt);
7855 tgl 1574 1948 : fmgr_info_copy(&(giststate->equalFn[i]),
7836 bruce 1575 1948 : index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1576 : scanCxt);
1577 :
1578 : /* opclasses are not required to provide a Distance method */
4510 tgl 1579 1948 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
1580 898 : fmgr_info_copy(&(giststate->distanceFn[i]),
2118 1581 898 : index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
1582 : scanCxt);
1583 : else
4510 1584 1050 : giststate->distanceFn[i].fn_oid = InvalidOid;
1585 :
1586 : /* opclasses are not required to provide a Fetch method */
2936 heikki.linnakangas 1587 1948 : if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
1588 631 : fmgr_info_copy(&(giststate->fetchFn[i]),
2878 bruce 1589 631 : index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
1590 : scanCxt);
1591 : else
2936 heikki.linnakangas 1592 1317 : giststate->fetchFn[i].fn_oid = InvalidOid;
1593 :
1594 : /*
1595 : * If the index column has a specified collation, we should honor that
1596 : * while doing comparisons. However, we may have a collatable storage
1597 : * type for a noncollatable indexed data type. If there's no index
1598 : * collation then specify default collation in case the support
1599 : * functions need collation. This is harmless if the support
1600 : * functions don't care about collation, so we just do it
1601 : * unconditionally. (We could alternatively call get_typcollation,
1602 : * but that seems like expensive overkill --- there aren't going to be
1603 : * any cases where a GiST storage type has a nondefault collation.)
1604 : */
4370 tgl 1605 1948 : if (OidIsValid(index->rd_indcollation[i]))
1606 96 : giststate->supportCollation[i] = index->rd_indcollation[i];
1607 : else
1608 1852 : giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1609 : }
1610 :
1611 : /* No opclass information for INCLUDE attributes */
1491 akorotkov 1612 2037 : for (; i < index->rd_att->natts; i++)
1613 : {
1614 249 : giststate->consistentFn[i].fn_oid = InvalidOid;
1615 249 : giststate->unionFn[i].fn_oid = InvalidOid;
1616 249 : giststate->compressFn[i].fn_oid = InvalidOid;
1617 249 : giststate->decompressFn[i].fn_oid = InvalidOid;
1618 249 : giststate->penaltyFn[i].fn_oid = InvalidOid;
1619 249 : giststate->picksplitFn[i].fn_oid = InvalidOid;
1620 249 : giststate->equalFn[i].fn_oid = InvalidOid;
1621 249 : giststate->distanceFn[i].fn_oid = InvalidOid;
1622 249 : giststate->fetchFn[i].fn_oid = InvalidOid;
1623 249 : giststate->supportCollation[i] = InvalidOid;
1624 : }
1625 :
4209 tgl 1626 1788 : MemoryContextSwitchTo(oldCxt);
1627 :
1628 1788 : return giststate;
1629 : }
1630 :
1631 : void
7836 bruce 1632 1523 : freeGISTstate(GISTSTATE *giststate)
1633 : {
1634 : /* It's sufficient to delete the scanCxt */
4209 tgl 1635 1523 : MemoryContextDelete(giststate->scanCxt);
9722 scrappy 1636 1523 : }
1637 :
1638 : /*
1639 : * gistprunepage() -- try to remove LP_DEAD items from the given page.
1640 : * Function assumes that buffer is exclusively locked.
1641 : */
1642 : static void
1496 heikki.linnakangas 1643 UBC 0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1644 : {
1645 : OffsetNumber deletable[MaxIndexTuplesPerPage];
2495 rhaas 1646 0 : int ndeletable = 0;
1647 : OffsetNumber offnum,
1648 : maxoff;
1649 :
2769 teodor 1650 0 : Assert(GistPageIsLeaf(page));
1651 :
1652 : /*
1653 : * Scan over all items to see which ones need to be deleted according to
1654 : * LP_DEAD flags.
1655 : */
1656 0 : maxoff = PageGetMaxOffsetNumber(page);
1657 0 : for (offnum = FirstOffsetNumber;
1658 0 : offnum <= maxoff;
1659 0 : offnum = OffsetNumberNext(offnum))
1660 : {
1661 0 : ItemId itemId = PageGetItemId(page, offnum);
1662 :
1663 0 : if (ItemIdIsDead(itemId))
1664 0 : deletable[ndeletable++] = offnum;
1665 : }
1666 :
1667 0 : if (ndeletable > 0)
1668 : {
143 pg 1669 UNC 0 : TransactionId snapshotConflictHorizon = InvalidTransactionId;
1670 :
803 pg 1671 UBC 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
1672 : snapshotConflictHorizon =
1673 0 : index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1674 : deletable, ndeletable);
1675 :
2769 teodor 1676 0 : START_CRIT_SECTION();
1677 :
1678 0 : PageIndexMultiDelete(page, deletable, ndeletable);
1679 :
1680 : /*
1681 : * Mark the page as not containing any LP_DEAD items. This is not
1682 : * certainly true (there might be some that have recently been marked,
1683 : * but weren't included in our target-item list), but it will almost
1684 : * always be true and it doesn't seem worth an additional page scan to
1685 : * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1686 : */
1687 0 : GistClearPageHasGarbage(page);
1688 :
1689 0 : MarkBufferDirty(buffer);
1690 :
1691 : /* XLOG stuff */
1692 0 : if (RelationNeedsWAL(rel))
1693 0 : {
1694 : XLogRecPtr recptr;
1695 :
1570 akorotkov 1696 0 : recptr = gistXLogDelete(buffer,
1697 : deletable, ndeletable,
1698 : snapshotConflictHorizon,
1699 : heapRel);
1700 :
2769 teodor 1701 UIC 0 : PageSetLSN(page, recptr);
2769 teodor 1702 EUB : }
1703 : else
2769 teodor 1704 UIC 0 : PageSetLSN(page, gistGetFakeLSN(rel));
2769 teodor 1705 EUB :
2769 teodor 1706 UIC 0 : END_CRIT_SECTION();
2769 teodor 1707 EUB : }
1708 :
1709 : /*
1710 : * Note: if we didn't find any LP_DEAD items, then the page's
1711 : * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1712 : * separate write to clear it, however. We will clear it when we split
1713 : * the page.
1714 : */
2769 teodor 1715 UIC 0 : }
|