Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gist.c
4 : : * interface routines for the postgres GiST index access method.
5 : : *
6 : : *
7 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : : * Portions Copyright (c) 1994, Regents of the University of California
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/gist/gist.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/gist_private.h"
18 : : #include "access/gistscan.h"
19 : : #include "access/xloginsert.h"
20 : : #include "catalog/pg_collation.h"
21 : : #include "commands/vacuum.h"
22 : : #include "miscadmin.h"
23 : : #include "nodes/execnodes.h"
24 : : #include "storage/predicate.h"
25 : : #include "utils/fmgrprotos.h"
26 : : #include "utils/index_selfuncs.h"
27 : : #include "utils/memutils.h"
28 : : #include "utils/rel.h"
29 : :
30 : : /* non-export function prototypes */
31 : : static void gistfixsplit(GISTInsertState *state, GISTSTATE *giststate);
32 : : static bool gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
33 : : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum);
34 : : static bool gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
35 : : GISTSTATE *giststate,
36 : : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
37 : : Buffer leftchild, Buffer rightchild,
38 : : bool unlockbuf, bool unlockleftchild);
39 : : static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
40 : : GISTSTATE *giststate, List *splitinfo, bool unlockbuf);
41 : : static void gistprunepage(Relation rel, Page page, Buffer buffer,
42 : : Relation heapRel);
43 : :
44 : :
45 : : #define ROTATEDIST(d) do { \
46 : : SplitPageLayout *tmp=(SplitPageLayout*)palloc0(sizeof(SplitPageLayout)); \
47 : : tmp->block.blkno = InvalidBlockNumber; \
48 : : tmp->buffer = InvalidBuffer; \
49 : : tmp->next = (d); \
50 : : (d)=tmp; \
51 : : } while(0)
52 : :
53 : :
54 : : /*
55 : : * GiST handler function: return IndexAmRoutine with access method parameters
56 : : * and callbacks.
57 : : */
58 : : Datum
3010 tgl@sss.pgh.pa.us 59 :CBC 3318 : gisthandler(PG_FUNCTION_ARGS)
60 : : {
61 : 3318 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
62 : :
63 : 3318 : amroutine->amstrategies = 0;
2908 teodor@sigaev.ru 64 : 3318 : amroutine->amsupport = GISTNProcs;
1476 akorotkov@postgresql 65 : 3318 : amroutine->amoptsprocnum = GIST_OPTIONS_PROC;
3010 tgl@sss.pgh.pa.us 66 : 3318 : amroutine->amcanorder = false;
67 : 3318 : amroutine->amcanorderbyop = true;
68 : 3318 : amroutine->amcanbackward = false;
69 : 3318 : amroutine->amcanunique = false;
70 : 3318 : amroutine->amcanmulticol = true;
71 : 3318 : amroutine->amoptionalkey = true;
72 : 3318 : amroutine->amsearcharray = false;
73 : 3318 : amroutine->amsearchnulls = true;
74 : 3318 : amroutine->amstorage = true;
75 : 3318 : amroutine->amclusterable = true;
2210 teodor@sigaev.ru 76 : 3318 : amroutine->ampredlocks = true;
2615 rhaas@postgresql.org 77 : 3318 : amroutine->amcanparallel = false;
128 tomas.vondra@postgre 78 :GNC 3318 : amroutine->amcanbuildparallel = false;
1862 akorotkov@postgresql 79 :CBC 3318 : amroutine->amcaninclude = true;
1551 akapila@postgresql.o 80 : 3318 : amroutine->amusemaintenanceworkmem = false;
391 tomas.vondra@postgre 81 : 3318 : amroutine->amsummarizing = false;
1551 akapila@postgresql.o 82 : 3318 : amroutine->amparallelvacuumoptions =
83 : : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
3010 tgl@sss.pgh.pa.us 84 : 3318 : amroutine->amkeytype = InvalidOid;
85 : :
86 : 3318 : amroutine->ambuild = gistbuild;
87 : 3318 : amroutine->ambuildempty = gistbuildempty;
88 : 3318 : amroutine->aminsert = gistinsert;
141 tomas.vondra@postgre 89 :GNC 3318 : amroutine->aminsertcleanup = NULL;
3010 tgl@sss.pgh.pa.us 90 :CBC 3318 : amroutine->ambulkdelete = gistbulkdelete;
91 : 3318 : amroutine->amvacuumcleanup = gistvacuumcleanup;
92 : 3318 : amroutine->amcanreturn = gistcanreturn;
93 : 3318 : amroutine->amcostestimate = gistcostestimate;
94 : 3318 : amroutine->amoptions = gistoptions;
2801 95 : 3318 : amroutine->amproperty = gistproperty;
1839 alvherre@alvh.no-ip. 96 : 3318 : amroutine->ambuildphasename = NULL;
3010 tgl@sss.pgh.pa.us 97 : 3318 : amroutine->amvalidate = gistvalidate;
1352 98 : 3318 : amroutine->amadjustmembers = gistadjustmembers;
3010 99 : 3318 : amroutine->ambeginscan = gistbeginscan;
100 : 3318 : amroutine->amrescan = gistrescan;
101 : 3318 : amroutine->amgettuple = gistgettuple;
102 : 3318 : amroutine->amgetbitmap = gistgetbitmap;
103 : 3318 : amroutine->amendscan = gistendscan;
104 : 3318 : amroutine->ammarkpos = NULL;
105 : 3318 : amroutine->amrestrpos = NULL;
2637 rhaas@postgresql.org 106 : 3318 : amroutine->amestimateparallelscan = NULL;
107 : 3318 : amroutine->aminitparallelscan = NULL;
108 : 3318 : amroutine->amparallelrescan = NULL;
109 : :
3010 tgl@sss.pgh.pa.us 110 : 3318 : PG_RETURN_POINTER(amroutine);
111 : : }
112 : :
113 : : /*
114 : : * Create and return a temporary memory context for use by GiST. We
115 : : * _always_ invoke user-provided methods in a temporary memory
116 : : * context, so that memory leaks in those functions cannot cause
117 : : * problems. Also, we use some additional temporary contexts in the
118 : : * GiST code itself, to avoid the need to do some awkward manual
119 : : * memory management.
120 : : */
121 : : MemoryContext
6779 bruce@momjian.us 122 : 3736 : createTempGistContext(void)
123 : : {
124 : 3736 : return AllocSetContextCreate(CurrentMemoryContext,
125 : : "GiST temporary context",
126 : : ALLOCSET_DEFAULT_SIZES);
127 : : }
128 : :
129 : : /*
130 : : * gistbuildempty() -- build an empty gist index in the initialization fork
131 : : */
132 : : void
3010 tgl@sss.pgh.pa.us 133 : 5 : gistbuildempty(Relation index)
134 : : {
135 : : Buffer buffer;
136 : :
137 : : /* Initialize the root page */
235 tmunro@postgresql.or 138 : 5 : buffer = ExtendBufferedRel(BMR_REL(index), INIT_FORKNUM, NULL,
139 : : EB_SKIP_EXTENSION_LOCK | EB_LOCK_FIRST);
140 : :
141 : : /* Initialize and xlog buffer */
4080 heikki.linnakangas@i 142 : 5 : START_CRIT_SECTION();
143 : 5 : GISTInitBuffer(buffer, F_LEAF);
144 : 5 : MarkBufferDirty(buffer);
3784 145 : 5 : log_newpage_buffer(buffer, true);
4080 146 [ - + ]: 5 : END_CRIT_SECTION();
147 : :
148 : : /* Unlock and release the buffer */
149 : 5 : UnlockReleaseBuffer(buffer);
4855 rhaas@postgresql.org 150 : 5 : }
151 : :
152 : : /*
153 : : * gistinsert -- wrapper for GiST tuple insertion.
154 : : *
155 : : * This is the public interface routine for tuple insertion in GiSTs.
156 : : * It doesn't do any work; just locks the relation and passes the buck.
157 : : */
158 : : bool
3010 tgl@sss.pgh.pa.us 159 : 151783 : gistinsert(Relation r, Datum *values, bool *isnull,
160 : : ItemPointer ht_ctid, Relation heapRel,
161 : : IndexUniqueCheck checkUnique,
162 : : bool indexUnchanged,
163 : : IndexInfo *indexInfo)
164 : : {
2621 165 : 151783 : GISTSTATE *giststate = (GISTSTATE *) indexInfo->ii_AmCache;
166 : : IndexTuple itup;
167 : : MemoryContext oldCxt;
168 : :
169 : : /* Initialize GISTSTATE cache if first call in this statement */
170 [ + + ]: 151783 : if (giststate == NULL)
171 : : {
172 : 600 : oldCxt = MemoryContextSwitchTo(indexInfo->ii_Context);
173 : 600 : giststate = initGISTstate(r);
174 : 600 : giststate->tempCxt = createTempGistContext();
175 : 600 : indexInfo->ii_AmCache = (void *) giststate;
176 : 600 : MemoryContextSwitchTo(oldCxt);
177 : : }
178 : :
4580 179 : 151783 : oldCxt = MemoryContextSwitchTo(giststate->tempCxt);
180 : :
181 : 151783 : itup = gistFormTuple(giststate, r,
182 : : values, isnull, true /* size is currently bogus */ );
9716 bruce@momjian.us 183 : 151782 : itup->t_tid = *ht_ctid;
184 : :
1838 heikki.linnakangas@i 185 : 151782 : gistdoinsert(r, itup, 0, giststate, heapRel, false);
186 : :
187 : : /* cleanup */
4580 tgl@sss.pgh.pa.us 188 : 151776 : MemoryContextSwitchTo(oldCxt);
2621 189 : 151776 : MemoryContextReset(giststate->tempCxt);
190 : :
3010 191 : 151776 : return false;
192 : : }
193 : :
194 : :
195 : : /*
196 : : * Place tuples from 'itup' to 'buffer'. If 'oldoffnum' is valid, the tuple
197 : : * at that offset is atomically removed along with inserting the new tuples.
198 : : * This is used to replace a tuple with a new one.
199 : : *
200 : : * If 'leftchildbuf' is valid, we're inserting the downlink for the page
201 : : * to the right of 'leftchildbuf', or updating the downlink for 'leftchildbuf'.
202 : : * F_FOLLOW_RIGHT flag on 'leftchildbuf' is cleared and NSN is set.
203 : : *
204 : : * If 'markfollowright' is true and the page is split, the left child is
205 : : * marked with F_FOLLOW_RIGHT flag. That is the normal case. During buffered
206 : : * index build, however, there is no concurrent access and the page splitting
207 : : * is done in a slightly simpler fashion, and false is passed.
208 : : *
209 : : * If there is not enough room on the page, it is split. All the split
210 : : * pages are kept pinned and locked and returned in *splitinfo, the caller
211 : : * is responsible for inserting the downlinks for them. However, if
212 : : * 'buffer' is the root page and it needs to be split, gistplacetopage()
213 : : * performs the split as one atomic operation, and *splitinfo is set to NIL.
214 : : * In that case, we continue to hold the root page locked, and the child
215 : : * pages are released; note that new tuple(s) are *not* on the root page
216 : : * but in one of the new child pages.
217 : : *
218 : : * If 'newblkno' is not NULL, returns the block number of page the first
219 : : * new/updated tuple was inserted to. Usually it's the given page, but could
220 : : * be its right sibling if the page was split.
221 : : *
222 : : * Returns 'true' if the page was split, 'false' otherwise.
223 : : */
224 : : bool
4602 heikki.linnakangas@i 225 : 859502 : gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
226 : : Buffer buffer,
227 : : IndexTuple *itup, int ntup, OffsetNumber oldoffnum,
228 : : BlockNumber *newblkno,
229 : : Buffer leftchildbuf,
230 : : List **splitinfo,
231 : : bool markfollowright,
232 : : Relation heapRel,
233 : : bool is_build)
234 : : {
4259 235 : 859502 : BlockNumber blkno = BufferGetBlockNumber(buffer);
2916 kgrittn@postgresql.o 236 : 859502 : Page page = BufferGetPage(buffer);
4861 heikki.linnakangas@i 237 : 859502 : bool is_leaf = (GistPageIsLeaf(page)) ? true : false;
238 : : XLogRecPtr recptr;
239 : : bool is_split;
240 : :
241 : : /*
242 : : * Refuse to modify a page that's incompletely split. This should not
243 : : * happen because we finish any incomplete splits while we walk down the
244 : : * tree. However, it's remotely possible that another concurrent inserter
245 : : * splits a parent page, and errors out before completing the split. We
246 : : * will just throw an error in that case, and leave any split we had in
247 : : * progress unfinished too. The next insert that comes along will clean up
248 : : * the mess.
249 : : */
250 [ - + ]: 859502 : if (GistFollowRight(page))
4861 heikki.linnakangas@i 251 [ # # ]:UBC 0 : elog(ERROR, "concurrent GiST page split was incomplete");
252 : :
253 : : /* should never try to insert to a deleted page */
1180 heikki.linnakangas@i 254 [ - + ]:CBC 859502 : Assert(!GistPageIsDeleted(page));
255 : :
4861 256 : 859502 : *splitinfo = NIL;
257 : :
258 : : /*
259 : : * if isupdate, remove old key: This node's key has been modified, either
260 : : * because a child split occurred or because we needed to adjust our key
261 : : * for an insert in a child node. Therefore, remove the old version of
262 : : * this node's key.
263 : : *
264 : : * for WAL replay, in the non-split case we handle this by setting up a
265 : : * one-element todelete array; in the split case, it's handled implicitly
266 : : * because the tuple vector passed to gistSplit won't include this tuple.
267 : : */
4602 268 : 859502 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
269 : :
270 : : /*
271 : : * If leaf page is full, try at first to delete dead tuples. And then
272 : : * check again.
273 : : */
3140 teodor@sigaev.ru 274 [ + + + + : 859502 : if (is_split && GistPageIsLeaf(page) && GistPageHasGarbage(page))
- + ]
275 : : {
1867 heikki.linnakangas@i 276 :UBC 0 : gistprunepage(rel, page, buffer, heapRel);
3140 teodor@sigaev.ru 277 : 0 : is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
278 : : }
279 : :
4861 heikki.linnakangas@i 280 [ + + ]:CBC 859502 : if (is_split)
281 : : {
282 : : /* no space for insertion */
283 : : IndexTuple *itvec;
284 : : int tlen;
103 rhaas@postgresql.org 285 :GNC 12689 : SplitPageLayout *dist = NULL,
286 : : *ptr;
4861 heikki.linnakangas@i 287 :CBC 12689 : BlockNumber oldrlink = InvalidBlockNumber;
4312 288 : 12689 : GistNSN oldnsn = 0;
289 : : SplitPageLayout rootpg;
290 : : bool is_rootsplit;
291 : : int npage;
292 : :
4861 293 : 12689 : is_rootsplit = (blkno == GIST_ROOT_BLKNO);
294 : :
295 : : /*
296 : : * Form index tuples vector to split. If we're replacing an old tuple,
297 : : * remove the old version from the vector.
298 : : */
299 : 12689 : itvec = gistextractpage(page, &tlen);
300 [ + + + - : 12689 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
301 : : {
302 : : /* on inner page we should remove old tuple */
303 : 2666 : int pos = oldoffnum - FirstOffsetNumber;
304 : :
6402 bruce@momjian.us 305 : 2666 : tlen--;
306 [ + + ]: 2666 : if (pos != tlen)
307 : 1701 : memmove(itvec + pos, itvec + pos + 1, sizeof(IndexTuple) * (tlen - pos));
308 : : }
4861 heikki.linnakangas@i 309 : 12689 : itvec = gistjoinvector(itvec, &tlen, itup, ntup);
4602 310 : 12689 : dist = gistSplit(rel, page, itvec, tlen, giststate);
311 : :
312 : : /*
313 : : * Check that split didn't produce too many pages.
314 : : */
3664 315 : 12689 : npage = 0;
316 [ + + ]: 38189 : for (ptr = dist; ptr; ptr = ptr->next)
317 : 25500 : npage++;
318 : : /* in a root split, we'll add one more page to the list below */
319 [ + + ]: 12689 : if (is_rootsplit)
320 : 204 : npage++;
321 [ - + ]: 12689 : if (npage > GIST_MAX_SPLIT_PAGES)
3664 heikki.linnakangas@i 322 [ # # ]:UBC 0 : elog(ERROR, "GiST page split into too many halves (%d, maximum %d)",
323 : : npage, GIST_MAX_SPLIT_PAGES);
324 : :
325 : : /*
326 : : * Set up pages to work with. Allocate new buffers for all but the
327 : : * leftmost page. The original page becomes the new leftmost page, and
328 : : * is just replaced with the new contents.
329 : : *
330 : : * For a root-split, allocate new buffers for all child pages, the
331 : : * original page is overwritten with new root page containing
332 : : * downlinks to the new child pages.
333 : : */
4861 heikki.linnakangas@i 334 :CBC 12689 : ptr = dist;
335 [ + + ]: 12689 : if (!is_rootsplit)
336 : : {
337 : : /* save old rightlink and NSN */
338 : 12485 : oldrlink = GistPageGetOpaque(page)->rightlink;
4105 339 : 12485 : oldnsn = GistPageGetNSN(page);
340 : :
4861 341 : 12485 : dist->buffer = buffer;
342 : 12485 : dist->block.blkno = BufferGetBlockNumber(buffer);
2916 kgrittn@postgresql.o 343 : 12485 : dist->page = PageGetTempPageCopySpecial(BufferGetPage(buffer));
344 : :
345 : : /* clean all flags except F_LEAF */
6549 teodor@sigaev.ru 346 : 12485 : GistPageGetOpaque(dist->page)->flags = (is_leaf) ? F_LEAF : 0;
347 : :
4861 heikki.linnakangas@i 348 : 12485 : ptr = ptr->next;
349 : : }
350 [ + + ]: 25704 : for (; ptr; ptr = ptr->next)
351 : : {
352 : : /* Allocate new page */
379 andres@anarazel.de 353 : 13015 : ptr->buffer = gistNewBuffer(rel, heapRel);
4861 heikki.linnakangas@i 354 : 13015 : GISTInitBuffer(ptr->buffer, (is_leaf) ? F_LEAF : 0);
2916 kgrittn@postgresql.o 355 : 13015 : ptr->page = BufferGetPage(ptr->buffer);
4861 heikki.linnakangas@i 356 : 13015 : ptr->block.blkno = BufferGetBlockNumber(ptr->buffer);
2210 teodor@sigaev.ru 357 : 13015 : PredicateLockPageSplit(rel,
358 : : BufferGetBlockNumber(buffer),
359 : : BufferGetBlockNumber(ptr->buffer));
360 : : }
361 : :
362 : : /*
363 : : * Now that we know which blocks the new pages go to, set up downlink
364 : : * tuples to point to them.
365 : : */
6402 bruce@momjian.us 366 [ + + ]: 38189 : for (ptr = dist; ptr; ptr = ptr->next)
367 : : {
4861 heikki.linnakangas@i 368 : 25500 : ItemPointerSetBlockNumber(&(ptr->itup->t_tid), ptr->block.blkno);
369 : 25500 : GistTupleSetValid(ptr->itup);
370 : : }
371 : :
372 : : /*
373 : : * If this is a root split, we construct the new root page with the
374 : : * downlinks here directly, instead of requiring the caller to insert
375 : : * them. Add the new root page to the list along with the child pages.
376 : : */
377 [ + + ]: 12689 : if (is_rootsplit)
378 : : {
379 : : IndexTuple *downlinks;
4753 bruce@momjian.us 380 : 204 : int ndownlinks = 0;
381 : : int i;
382 : :
4861 heikki.linnakangas@i 383 : 204 : rootpg.buffer = buffer;
2916 kgrittn@postgresql.o 384 : 204 : rootpg.page = PageGetTempPageCopySpecial(BufferGetPage(rootpg.buffer));
4861 heikki.linnakangas@i 385 : 204 : GistPageGetOpaque(rootpg.page)->flags = 0;
386 : :
387 : : /* Prepare a vector of all the downlinks */
388 [ + + ]: 613 : for (ptr = dist; ptr; ptr = ptr->next)
389 : 409 : ndownlinks++;
390 : 204 : downlinks = palloc(sizeof(IndexTuple) * ndownlinks);
391 [ + + ]: 613 : for (i = 0, ptr = dist; ptr; ptr = ptr->next)
392 : 409 : downlinks[i++] = ptr->itup;
393 : :
394 : 204 : rootpg.block.blkno = GIST_ROOT_BLKNO;
395 : 204 : rootpg.block.num = ndownlinks;
396 : 204 : rootpg.list = gistfillitupvec(downlinks, ndownlinks,
397 : : &(rootpg.lenlist));
398 : 204 : rootpg.itup = NULL;
399 : :
400 : 204 : rootpg.next = dist;
401 : 204 : dist = &rootpg;
402 : : }
403 : : else
404 : : {
405 : : /* Prepare split-info to be returned to caller */
406 [ + + ]: 37576 : for (ptr = dist; ptr; ptr = ptr->next)
407 : : {
408 : 25091 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
409 : :
410 : 25091 : si->buf = ptr->buffer;
411 : 25091 : si->downlink = ptr->itup;
412 : 25091 : *splitinfo = lappend(*splitinfo, si);
413 : : }
414 : : }
415 : :
416 : : /*
417 : : * Fill all pages. All the pages are new, ie. freshly allocated empty
418 : : * pages, or a temporary copy of the old page.
419 : : */
420 [ + + ]: 38393 : for (ptr = dist; ptr; ptr = ptr->next)
421 : : {
4753 bruce@momjian.us 422 : 25704 : char *data = (char *) (ptr->list);
423 : :
599 drowley@postgresql.o 424 [ + + ]: 938168 : for (int i = 0; i < ptr->block.num; i++)
425 : : {
4259 heikki.linnakangas@i 426 : 912464 : IndexTuple thistup = (IndexTuple) data;
427 : :
428 [ - + ]: 912464 : if (PageAddItem(ptr->page, (Item) data, IndexTupleSize(thistup), i + FirstOffsetNumber, false, false) == InvalidOffsetNumber)
4602 heikki.linnakangas@i 429 [ # # ]:UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"", RelationGetRelationName(rel));
430 : :
431 : : /*
432 : : * If this is the first inserted/updated tuple, let the caller
433 : : * know which page it landed on.
434 : : */
4259 heikki.linnakangas@i 435 [ + + + + ]:CBC 912464 : if (newblkno && ItemPointerEquals(&thistup->t_tid, &(*itup)->t_tid))
436 : 387 : *newblkno = ptr->block.blkno;
437 : :
438 : 912464 : data += IndexTupleSize(thistup);
439 : : }
440 : :
441 : : /* Set up rightlinks */
4861 442 [ + + + + ]: 25704 : if (ptr->next && ptr->block.blkno != GIST_ROOT_BLKNO)
443 : 25622 : GistPageGetOpaque(ptr->page)->rightlink =
444 : 12811 : ptr->next->block.blkno;
445 : : else
446 : 12893 : GistPageGetOpaque(ptr->page)->rightlink = oldrlink;
447 : :
448 : : /*
449 : : * Mark the all but the right-most page with the follow-right
450 : : * flag. It will be cleared as soon as the downlink is inserted
451 : : * into the parent, but this ensures that if we error out before
452 : : * that, the index is still consistent. (in buffering build mode,
453 : : * any error will abort the index build anyway, so this is not
454 : : * needed.)
455 : : */
4602 456 [ + + + + : 25704 : if (ptr->next && !is_rootsplit && markfollowright)
+ + ]
4861 457 : 12222 : GistMarkFollowRight(ptr->page);
458 : : else
459 : 13482 : GistClearFollowRight(ptr->page);
460 : :
461 : : /*
462 : : * Copy the NSN of the original page to all pages. The
463 : : * F_FOLLOW_RIGHT flags ensure that scans will follow the
464 : : * rightlinks until the downlinks are inserted.
465 : : */
4105 466 : 25704 : GistPageSetNSN(ptr->page, oldnsn);
467 : : }
468 : :
469 : : /*
470 : : * gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
471 : : * insertion for that. NB: The number of pages and data segments
472 : : * specified here must match the calculations in gistXLogSplit()!
473 : : */
1838 474 [ + + + + : 12689 : if (!is_build && RelationNeedsWAL(rel))
+ + + - +
- ]
3433 475 : 1737 : XLogEnsureRecordSpace(npage, 1 + npage * 2);
476 : :
6549 teodor@sigaev.ru 477 : 12689 : START_CRIT_SECTION();
478 : :
479 : : /*
480 : : * Must mark buffers dirty before XLogInsert, even though we'll still
481 : : * be changing their opaque fields below.
482 : : */
6402 bruce@momjian.us 483 [ + + ]: 38393 : for (ptr = dist; ptr; ptr = ptr->next)
6589 tgl@sss.pgh.pa.us 484 : 25704 : MarkBufferDirty(ptr->buffer);
4861 heikki.linnakangas@i 485 [ + + ]: 12689 : if (BufferIsValid(leftchildbuf))
486 : 2580 : MarkBufferDirty(leftchildbuf);
487 : :
488 : : /*
489 : : * The first page in the chain was a temporary working copy meant to
490 : : * replace the old page. Copy it over the old page.
491 : : */
2916 kgrittn@postgresql.o 492 : 12689 : PageRestoreTempPage(dist->page, BufferGetPage(dist->buffer));
493 : 12689 : dist->page = BufferGetPage(dist->buffer);
494 : :
495 : : /*
496 : : * Write the WAL record.
497 : : *
498 : : * If we're building a new index, however, we don't WAL-log changes
499 : : * yet. The LSN-NSN interlock between parent and child requires that
500 : : * LSNs never move backwards, so set the LSNs to a value that's
501 : : * smaller than any real or fake unlogged LSN that might be generated
502 : : * later. (There can't be any concurrent scans during index build, so
503 : : * we don't need to be able to detect concurrent splits yet.)
504 : : */
1838 heikki.linnakangas@i 505 [ + + ]: 12689 : if (is_build)
506 : 10949 : recptr = GistBuildLSN;
507 : : else
508 : : {
509 [ + + + + : 1740 : if (RelationNeedsWAL(rel))
+ - + - ]
510 : 1737 : recptr = gistXLogSplit(is_leaf,
511 : : dist, oldrlink, oldnsn, leftchildbuf,
512 : : markfollowright);
513 : : else
514 : 3 : recptr = gistGetFakeLSN(rel);
515 : : }
516 : :
6402 bruce@momjian.us 517 [ + + ]: 38393 : for (ptr = dist; ptr; ptr = ptr->next)
4861 heikki.linnakangas@i 518 : 25704 : PageSetLSN(ptr->page, recptr);
519 : :
520 : : /*
521 : : * Return the new child buffers to the caller.
522 : : *
523 : : * If this was a root split, we've already inserted the downlink
524 : : * pointers, in the form of a new root page. Therefore we can release
525 : : * all the new buffers, and keep just the root page locked.
526 : : */
527 [ + + ]: 12689 : if (is_rootsplit)
528 : : {
529 [ + + ]: 613 : for (ptr = dist->next; ptr; ptr = ptr->next)
530 : 409 : UnlockReleaseBuffer(ptr->buffer);
531 : : }
532 : : }
533 : : else
534 : : {
535 : : /*
536 : : * Enough space. We always get here if ntup==0.
537 : : */
6549 teodor@sigaev.ru 538 : 846813 : START_CRIT_SECTION();
539 : :
540 : : /*
541 : : * Delete old tuple if any, then insert new tuple(s) if any. If
542 : : * possible, use the fast path of PageIndexTupleOverwrite.
543 : : */
4861 heikki.linnakangas@i 544 [ + + + - : 846813 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
545 : : {
2774 tgl@sss.pgh.pa.us 546 [ + + ]: 375960 : if (ntup == 1)
547 : : {
548 : : /* One-for-one replacement, so use PageIndexTupleOverwrite */
549 [ - + ]: 366063 : if (!PageIndexTupleOverwrite(page, oldoffnum, (Item) *itup,
550 : 366063 : IndexTupleSize(*itup)))
2774 tgl@sss.pgh.pa.us 551 [ # # ]:UBC 0 : elog(ERROR, "failed to add item to index page in \"%s\"",
552 : : RelationGetRelationName(rel));
553 : : }
554 : : else
555 : : {
556 : : /* Delete old, then append new tuple(s) to page */
2774 tgl@sss.pgh.pa.us 557 :CBC 9897 : PageIndexTupleDelete(page, oldoffnum);
558 : 9897 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
559 : : }
560 : : }
561 : : else
562 : : {
563 : : /* Just append new tuples at the end of the page */
564 : 470853 : gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
565 : : }
566 : :
4861 heikki.linnakangas@i 567 : 846813 : MarkBufferDirty(buffer);
568 : :
569 [ + + ]: 846813 : if (BufferIsValid(leftchildbuf))
570 : 9642 : MarkBufferDirty(leftchildbuf);
571 : :
1838 572 [ + + ]: 846813 : if (is_build)
573 : 599875 : recptr = GistBuildLSN;
574 : : else
575 : : {
576 [ + + + + : 246938 : if (RelationNeedsWAL(rel))
+ - + - ]
6779 bruce@momjian.us 577 : 246899 : {
1838 heikki.linnakangas@i 578 : 246899 : OffsetNumber ndeloffs = 0,
579 : : deloffs[1];
580 : :
581 [ + + + - : 246899 : if (OffsetNumberIsValid(oldoffnum))
+ + ]
582 : : {
583 : 96890 : deloffs[0] = oldoffnum;
584 : 96890 : ndeloffs = 1;
585 : : }
586 : :
587 : 246899 : recptr = gistXLogUpdate(buffer,
588 : : deloffs, ndeloffs, itup, ntup,
589 : : leftchildbuf);
590 : : }
591 : : else
592 : 39 : recptr = gistGetFakeLSN(rel);
593 : : }
594 : 846813 : PageSetLSN(page, recptr);
595 : :
4259 596 [ + + ]: 846813 : if (newblkno)
597 : 52185 : *newblkno = blkno;
598 : : }
599 : :
600 : : /*
601 : : * If we inserted the downlink for a child page, set NSN and clear
602 : : * F_FOLLOW_RIGHT flag on the left child, so that concurrent scans know to
603 : : * follow the rightlink if and only if they looked at the parent page
604 : : * before we inserted the downlink.
605 : : *
606 : : * Note that we do this *after* writing the WAL record. That means that
607 : : * the possible full page image in the WAL record does not include these
608 : : * changes, and they must be replayed even if the page is restored from
609 : : * the full page image. There's a chicken-and-egg problem: if we updated
610 : : * the child pages first, we wouldn't know the recptr of the WAL record
611 : : * we're about to write.
612 : : */
4861 613 [ + + ]: 859502 : if (BufferIsValid(leftchildbuf))
614 : : {
2916 kgrittn@postgresql.o 615 : 12222 : Page leftpg = BufferGetPage(leftchildbuf);
616 : :
4105 heikki.linnakangas@i 617 : 12222 : GistPageSetNSN(leftpg, recptr);
4861 618 : 12222 : GistClearFollowRight(leftpg);
619 : :
620 : 12222 : PageSetLSN(leftpg, recptr);
621 : : }
622 : :
623 [ - + ]: 859502 : END_CRIT_SECTION();
624 : :
625 : 859502 : return is_split;
626 : : }
627 : :
628 : : /*
629 : : * Workhouse routine for doing insertion into a GiST index. Note that
630 : : * this routine assumes it is invoked in a short-lived memory context,
631 : : * so it does not bother releasing palloc'd allocations.
632 : : */
633 : : void
1941 akorotkov@postgresql 634 : 463046 : gistdoinsert(Relation r, IndexTuple itup, Size freespace,
635 : : GISTSTATE *giststate, Relation heapRel, bool is_build)
636 : : {
637 : : ItemId iid;
638 : : IndexTuple idxtuple;
639 : : GISTInsertStack firststack;
640 : : GISTInsertStack *stack;
641 : : GISTInsertState state;
4861 heikki.linnakangas@i 642 : 463046 : bool xlocked = false;
643 : :
644 : 463046 : memset(&state, 0, sizeof(GISTInsertState));
645 : 463046 : state.freespace = freespace;
646 : 463046 : state.r = r;
1941 akorotkov@postgresql 647 : 463046 : state.heapRel = heapRel;
1838 heikki.linnakangas@i 648 : 463046 : state.is_build = is_build;
649 : :
650 : : /* Start from the root */
4861 651 : 463046 : firststack.blkno = GIST_ROOT_BLKNO;
4312 652 : 463046 : firststack.lsn = 0;
1797 653 : 463046 : firststack.retry_from_parent = false;
4861 654 : 463046 : firststack.parent = NULL;
4657 655 : 463046 : firststack.downlinkoffnum = InvalidOffsetNumber;
4861 656 : 463046 : state.stack = stack = &firststack;
657 : :
658 : : /*
659 : : * Walk down along the path of smallest penalty, updating the parent
660 : : * pointers with the key we're inserting as we go. If we crash in the
661 : : * middle, the tree is consistent, although the possible parent updates
662 : : * were a waste.
663 : : */
664 : : for (;;)
665 : : {
666 : : /*
667 : : * If we split an internal page while descending the tree, we have to
668 : : * retry at the parent. (Normally, the LSN-NSN interlock below would
669 : : * also catch this and cause us to retry. But LSNs are not updated
670 : : * during index build.)
671 : : */
1797 672 [ + + ]: 1050628 : while (stack->retry_from_parent)
673 : : {
674 [ - + ]: 150 : if (xlocked)
1797 heikki.linnakangas@i 675 :UBC 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
1797 heikki.linnakangas@i 676 :CBC 150 : xlocked = false;
677 : 150 : ReleaseBuffer(stack->buffer);
678 : 150 : state.stack = stack = stack->parent;
679 : : }
680 : :
4861 681 [ + + ]: 1050478 : if (XLogRecPtrIsInvalid(stack->lsn))
682 : 1050400 : stack->buffer = ReadBuffer(state.r, stack->blkno);
683 : :
684 : : /*
685 : : * Be optimistic and grab shared lock first. Swap it for an exclusive
686 : : * lock later if we need to update the page.
687 : : */
688 [ + + ]: 1050478 : if (!xlocked)
689 : : {
690 : 1050477 : LockBuffer(stack->buffer, GIST_SHARE);
691 : 1050477 : gistcheckpage(state.r, stack->buffer);
692 : : }
693 : :
2916 kgrittn@postgresql.o 694 : 1050478 : stack->page = (Page) BufferGetPage(stack->buffer);
2287 alvherre@alvh.no-ip. 695 : 1050478 : stack->lsn = xlocked ?
696 [ + + ]: 1050478 : PageGetLSN(stack->page) : BufferGetLSNAtomic(stack->buffer);
4861 heikki.linnakangas@i 697 [ + + + + : 1050478 : Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn));
+ + + + -
+ ]
698 : :
699 : : /*
700 : : * If this page was split but the downlink was never inserted to the
701 : : * parent because the inserting backend crashed before doing that, fix
702 : : * that now.
703 : : */
704 [ - + ]: 1050478 : if (GistFollowRight(stack->page))
705 : : {
4861 heikki.linnakangas@i 706 [ # # ]:UBC 0 : if (!xlocked)
707 : : {
708 : 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
709 : 0 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
710 : 0 : xlocked = true;
711 : : /* someone might've completed the split when we unlocked */
712 [ # # ]: 0 : if (!GistFollowRight(stack->page))
713 : 0 : continue;
714 : : }
715 : 0 : gistfixsplit(&state, giststate);
716 : :
717 : 0 : UnlockReleaseBuffer(stack->buffer);
718 : 0 : xlocked = false;
719 : 0 : state.stack = stack = stack->parent;
720 : 0 : continue;
721 : : }
722 : :
1726 heikki.linnakangas@i 723 [ + + + - ]:CBC 1637850 : if ((stack->blkno != GIST_ROOT_BLKNO &&
724 : 587372 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
725 [ - + ]: 1050478 : GistPageIsDeleted(stack->page))
726 : : {
727 : : /*
728 : : * Concurrent split or page deletion detected. There's no
729 : : * guarantee that the downlink for this page is consistent with
730 : : * the tuple we're inserting anymore, so go back to parent and
731 : : * rechoose the best child.
732 : : */
4861 heikki.linnakangas@i 733 :UBC 0 : UnlockReleaseBuffer(stack->buffer);
734 : 0 : xlocked = false;
735 : 0 : state.stack = stack = stack->parent;
6866 teodor@sigaev.ru 736 : 0 : continue;
737 : : }
738 : :
4861 heikki.linnakangas@i 739 [ + + ]:CBC 1050478 : if (!GistPageIsLeaf(stack->page))
740 : : {
741 : : /*
742 : : * This is an internal page so continue to walk down the tree.
743 : : * Find the child node that has the minimum insertion penalty.
744 : : */
745 : : BlockNumber childblkno;
746 : : IndexTuple newtup;
747 : : GISTInsertStack *item;
748 : : OffsetNumber downlinkoffnum;
749 : :
4657 750 : 587432 : downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate);
751 : 587432 : iid = PageGetItemId(stack->page, downlinkoffnum);
4861 752 : 587432 : idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
753 : 587432 : childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
754 : :
755 : : /*
756 : : * Check that it's not a leftover invalid tuple from pre-9.1
757 : : */
758 [ - + ]: 587432 : if (GistTupleIsInvalid(idxtuple))
4861 heikki.linnakangas@i 759 [ # # ]:UBC 0 : ereport(ERROR,
760 : : (errmsg("index \"%s\" contains an inner tuple marked as invalid",
761 : : RelationGetRelationName(r)),
762 : : errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
763 : : errhint("Please REINDEX it.")));
764 : :
765 : : /*
766 : : * Check that the key representing the target child node is
767 : : * consistent with the key we're inserting. Update it if it's not.
768 : : */
4861 heikki.linnakangas@i 769 :CBC 587432 : newtup = gistgetadjusted(state.r, idxtuple, itup, giststate);
770 [ + + ]: 587432 : if (newtup)
771 : : {
772 : : /*
773 : : * Swap shared lock for an exclusive one. Beware, the page may
774 : : * change while we unlock/lock the page...
775 : : */
776 [ + - ]: 331668 : if (!xlocked)
777 : : {
778 : 331668 : LockBuffer(stack->buffer, GIST_UNLOCK);
779 : 331668 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
780 : 331668 : xlocked = true;
2916 kgrittn@postgresql.o 781 : 331668 : stack->page = (Page) BufferGetPage(stack->buffer);
782 : :
4125 alvherre@alvh.no-ip. 783 [ - + ]: 331668 : if (PageGetLSN(stack->page) != stack->lsn)
784 : : {
785 : : /* the page was changed while we unlocked it, retry */
4861 heikki.linnakangas@i 786 :UBC 0 : continue;
787 : : }
788 : : }
789 : :
790 : : /*
791 : : * Update the tuple.
792 : : *
793 : : * We still hold the lock after gistinserttuple(), but it
794 : : * might have to split the page to make the updated tuple fit.
795 : : * In that case the updated tuple might migrate to the other
796 : : * half of the split, so we have to go back to the parent and
797 : : * descend back to the half that's a better fit for the new
798 : : * tuple.
799 : : */
4356 heikki.linnakangas@i 800 [ + + ]:CBC 331668 : if (gistinserttuple(&state, stack, giststate, newtup,
801 : : downlinkoffnum))
802 : : {
803 : : /*
804 : : * If this was a root split, the root page continues to be
805 : : * the parent and the updated tuple went to one of the
806 : : * child pages, so we just need to retry from the root
807 : : * page.
808 : : */
4844 809 [ + + ]: 78 : if (stack->blkno != GIST_ROOT_BLKNO)
810 : : {
811 : 77 : UnlockReleaseBuffer(stack->buffer);
812 : 77 : xlocked = false;
813 : 77 : state.stack = stack = stack->parent;
814 : : }
4861 815 : 78 : continue;
816 : : }
817 : : }
818 : 587354 : LockBuffer(stack->buffer, GIST_UNLOCK);
819 : 587354 : xlocked = false;
820 : :
821 : : /* descend to the chosen child */
822 : 587354 : item = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
823 : 587354 : item->blkno = childblkno;
824 : 587354 : item->parent = stack;
4657 825 : 587354 : item->downlinkoffnum = downlinkoffnum;
4861 826 : 587354 : state.stack = stack = item;
827 : : }
828 : : else
829 : : {
830 : : /*
831 : : * Leaf page. Insert the new key. We've already updated all the
832 : : * parents on the way down, but we might have to split the page if
833 : : * it doesn't fit. gistinserttuple() will take care of that.
834 : : */
835 : :
836 : : /*
837 : : * Swap shared lock for an exclusive one. Be careful, the page may
838 : : * change while we unlock/lock the page...
839 : : */
840 [ + - ]: 463046 : if (!xlocked)
841 : : {
842 : 463046 : LockBuffer(stack->buffer, GIST_UNLOCK);
843 : 463046 : LockBuffer(stack->buffer, GIST_EXCLUSIVE);
844 : 463046 : xlocked = true;
2916 kgrittn@postgresql.o 845 : 463046 : stack->page = (Page) BufferGetPage(stack->buffer);
4861 heikki.linnakangas@i 846 : 463046 : stack->lsn = PageGetLSN(stack->page);
847 : :
848 [ + + ]: 463046 : if (stack->blkno == GIST_ROOT_BLKNO)
849 : : {
850 : : /*
851 : : * the only page that can become inner instead of leaf is
852 : : * the root page, so for root we should recheck it
853 : : */
854 [ - + ]: 26401 : if (!GistPageIsLeaf(stack->page))
855 : : {
856 : : /*
857 : : * very rare situation: during unlock/lock index with
858 : : * number of pages = 1 was increased
859 : : */
4861 heikki.linnakangas@i 860 :UBC 0 : LockBuffer(stack->buffer, GIST_UNLOCK);
861 : 0 : xlocked = false;
862 : 0 : continue;
863 : : }
864 : :
865 : : /*
866 : : * we don't need to check root split, because checking
867 : : * leaf/inner is enough to recognize split for root
868 : : */
869 : : }
1726 heikki.linnakangas@i 870 [ + - + - ]:CBC 873290 : else if ((GistFollowRight(stack->page) ||
1180 871 : 436645 : stack->parent->lsn < GistPageGetNSN(stack->page)) ||
1726 872 [ - + ]: 436645 : GistPageIsDeleted(stack->page))
873 : : {
874 : : /*
875 : : * The page was split or deleted while we momentarily
876 : : * unlocked the page. Go back to parent.
877 : : */
4861 heikki.linnakangas@i 878 :UBC 0 : UnlockReleaseBuffer(stack->buffer);
879 : 0 : xlocked = false;
880 : 0 : state.stack = stack = stack->parent;
881 : 0 : continue;
882 : : }
883 : : }
884 : :
885 : : /* now state.stack->(page, buffer and blkno) points to leaf page */
886 : :
4356 heikki.linnakangas@i 887 :CBC 463046 : gistinserttuple(&state, stack, giststate, itup,
888 : : InvalidOffsetNumber);
4861 889 : 463040 : LockBuffer(stack->buffer, GIST_UNLOCK);
890 : :
891 : : /* Release any pins we might still hold before exiting */
892 [ + + ]: 1513201 : for (; stack; stack = stack->parent)
893 : 1050161 : ReleaseBuffer(stack->buffer);
6879 teodor@sigaev.ru 894 : 463040 : break;
895 : : }
896 : : }
6866 897 : 463040 : }
898 : :
899 : : /*
900 : : * Traverse the tree to find path from root page to specified "child" block.
901 : : *
902 : : * returns a new insertion stack, starting from the parent of "child", up
903 : : * to the root. *downlinkoffnum is set to the offset of the downlink in the
904 : : * direct parent of child.
905 : : *
906 : : * To prevent deadlocks, this should lock only one page at a time.
907 : : */
908 : : static GISTInsertStack *
4657 heikki.linnakangas@i 909 :UBC 0 : gistFindPath(Relation r, BlockNumber child, OffsetNumber *downlinkoffnum)
910 : : {
911 : : Page page;
912 : : Buffer buffer;
913 : : OffsetNumber i,
914 : : maxoff;
915 : : ItemId iid;
916 : : IndexTuple idxtuple;
917 : : List *fifo;
918 : : GISTInsertStack *top,
919 : : *ptr;
920 : : BlockNumber blkno;
921 : :
922 : 0 : top = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6866 teodor@sigaev.ru 923 : 0 : top->blkno = GIST_ROOT_BLKNO;
4657 heikki.linnakangas@i 924 : 0 : top->downlinkoffnum = InvalidOffsetNumber;
925 : :
926 : 0 : fifo = list_make1(top);
927 [ # # ]: 0 : while (fifo != NIL)
928 : : {
929 : : /* Get next page to visit */
930 : 0 : top = linitial(fifo);
931 : 0 : fifo = list_delete_first(fifo);
932 : :
6590 tgl@sss.pgh.pa.us 933 : 0 : buffer = ReadBuffer(r, top->blkno);
934 : 0 : LockBuffer(buffer, GIST_SHARE);
6734 935 : 0 : gistcheckpage(r, buffer);
2916 kgrittn@postgresql.o 936 : 0 : page = (Page) BufferGetPage(buffer);
937 : :
6779 bruce@momjian.us 938 [ # # ]: 0 : if (GistPageIsLeaf(page))
939 : : {
940 : : /*
941 : : * Because we scan the index top-down, all the rest of the pages
942 : : * in the queue must be leaf pages as well.
943 : : */
6589 tgl@sss.pgh.pa.us 944 : 0 : UnlockReleaseBuffer(buffer);
4657 heikki.linnakangas@i 945 : 0 : break;
946 : : }
947 : :
948 : : /* currently, internal pages are never deleted */
1726 949 [ # # ]: 0 : Assert(!GistPageIsDeleted(page));
950 : :
2287 alvherre@alvh.no-ip. 951 : 0 : top->lsn = BufferGetLSNAtomic(buffer);
952 : :
953 : : /*
954 : : * If F_FOLLOW_RIGHT is set, the page to the right doesn't have a
955 : : * downlink. This should not normally happen..
956 : : */
4861 heikki.linnakangas@i 957 [ # # ]: 0 : if (GistFollowRight(page))
958 [ # # ]: 0 : elog(ERROR, "concurrent GiST page split was incomplete");
959 : :
4105 960 [ # # # # ]: 0 : if (top->parent && top->parent->lsn < GistPageGetNSN(page) &&
6779 bruce@momjian.us 961 [ # # ]: 0 : GistPageGetOpaque(page)->rightlink != InvalidBlockNumber /* sanity check */ )
962 : : {
963 : : /*
964 : : * Page was split while we looked elsewhere. We didn't see the
965 : : * downlink to the right page when we scanned the parent, so add
966 : : * it to the queue now.
967 : : *
968 : : * Put the right page ahead of the queue, so that we visit it
969 : : * next. That's important, because if this is the lowest internal
970 : : * level, just above leaves, we might already have queued up some
971 : : * leaf pages, and we assume that there can't be any non-leaf
972 : : * pages behind leaf pages.
973 : : */
974 : 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6866 teodor@sigaev.ru 975 : 0 : ptr->blkno = GistPageGetOpaque(page)->rightlink;
4657 heikki.linnakangas@i 976 : 0 : ptr->downlinkoffnum = InvalidOffsetNumber;
977 : 0 : ptr->parent = top->parent;
978 : :
979 : 0 : fifo = lcons(ptr, fifo);
980 : : }
981 : :
6866 teodor@sigaev.ru 982 : 0 : maxoff = PageGetMaxOffsetNumber(page);
983 : :
6779 bruce@momjian.us 984 [ # # ]: 0 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
985 : : {
6866 teodor@sigaev.ru 986 : 0 : iid = PageGetItemId(page, i);
987 : 0 : idxtuple = (IndexTuple) PageGetItem(page, iid);
988 : 0 : blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
6779 bruce@momjian.us 989 [ # # ]: 0 : if (blkno == child)
990 : : {
991 : : /* Found it! */
6589 tgl@sss.pgh.pa.us 992 : 0 : UnlockReleaseBuffer(buffer);
4657 heikki.linnakangas@i 993 : 0 : *downlinkoffnum = i;
6866 teodor@sigaev.ru 994 : 0 : return top;
995 : : }
996 : : else
997 : : {
998 : : /* Append this child to the list of pages to visit later */
6779 bruce@momjian.us 999 : 0 : ptr = (GISTInsertStack *) palloc0(sizeof(GISTInsertStack));
6866 teodor@sigaev.ru 1000 : 0 : ptr->blkno = blkno;
4657 heikki.linnakangas@i 1001 : 0 : ptr->downlinkoffnum = i;
6866 teodor@sigaev.ru 1002 : 0 : ptr->parent = top;
1003 : :
4657 heikki.linnakangas@i 1004 : 0 : fifo = lappend(fifo, ptr);
1005 : : }
1006 : : }
1007 : :
6589 tgl@sss.pgh.pa.us 1008 : 0 : UnlockReleaseBuffer(buffer);
1009 : : }
1010 : :
4657 heikki.linnakangas@i 1011 [ # # ]: 0 : elog(ERROR, "failed to re-find parent of a page in index \"%s\", block %u",
1012 : : RelationGetRelationName(r), child);
1013 : : return NULL; /* keep compiler quiet */
1014 : : }
1015 : :
1016 : : /*
1017 : : * Updates the stack so that child->parent is the correct parent of the
1018 : : * child. child->parent must be exclusively locked on entry, and will
1019 : : * remain so at exit, but it might not be the same page anymore.
1020 : : */
1021 : : static void
201 heikki.linnakangas@i 1022 :CBC 12222 : gistFindCorrectParent(Relation r, GISTInsertStack *child, bool is_build)
1023 : : {
6779 bruce@momjian.us 1024 : 12222 : GISTInsertStack *parent = child->parent;
1025 : : ItemId iid;
1026 : : IndexTuple idxtuple;
1027 : : OffsetNumber maxoff;
1028 : : GISTInsertStack *ptr;
1029 : :
6734 tgl@sss.pgh.pa.us 1030 : 12222 : gistcheckpage(r, parent->buffer);
2916 kgrittn@postgresql.o 1031 : 12222 : parent->page = (Page) BufferGetPage(parent->buffer);
201 heikki.linnakangas@i 1032 : 12222 : maxoff = PageGetMaxOffsetNumber(parent->page);
1033 : :
1034 : : /* Check if the downlink is still where it was before */
1035 [ + + + - ]: 12222 : if (child->downlinkoffnum != InvalidOffsetNumber && child->downlinkoffnum <= maxoff)
1036 : : {
1037 : 12220 : iid = PageGetItemId(parent->page, child->downlinkoffnum);
1038 : 12220 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1039 [ + - ]: 12220 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1040 : 12220 : return; /* still there */
1041 : : }
1042 : :
1043 : : /*
1044 : : * The page has changed since we looked. During normal operation, every
1045 : : * update of a page changes its LSN, so the LSN we memorized should have
1046 : : * changed too. During index build, however, we don't WAL-log the changes
1047 : : * until we have built the index, so the LSN doesn't change. There is no
1048 : : * concurrent activity during index build, but we might have changed the
1049 : : * parent ourselves.
1050 : : */
1051 [ - + + - ]: 2 : Assert(parent->lsn != PageGetLSN(parent->page) || is_build);
1052 : :
1053 : : /*
1054 : : * Scan the page to re-find the downlink. If the page was split, it might
1055 : : * have moved to a different page, so follow the right links until we find
1056 : : * it.
1057 : : */
1058 : : while (true)
1059 : 1 : {
1060 : : OffsetNumber i;
1061 : :
1062 : 3 : maxoff = PageGetMaxOffsetNumber(parent->page);
1063 [ + + ]: 12 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
1064 : : {
1065 : 11 : iid = PageGetItemId(parent->page, i);
1066 : 11 : idxtuple = (IndexTuple) PageGetItem(parent->page, iid);
1067 [ + + ]: 11 : if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == child->blkno)
1068 : : {
1069 : : /* yes!!, found */
1070 : 2 : child->downlinkoffnum = i;
1071 : 2 : return;
1072 : : }
1073 : : }
1074 : :
1075 : 1 : parent->blkno = GistPageGetOpaque(parent->page)->rightlink;
1076 : 1 : parent->downlinkoffnum = InvalidOffsetNumber;
1077 : 1 : UnlockReleaseBuffer(parent->buffer);
1078 [ - + ]: 1 : if (parent->blkno == InvalidBlockNumber)
1079 : : {
1080 : : /*
1081 : : * End of chain and still didn't find parent. It's a very-very
1082 : : * rare situation when the root was split.
1083 : : */
201 heikki.linnakangas@i 1084 :UBC 0 : break;
1085 : : }
201 heikki.linnakangas@i 1086 :CBC 1 : parent->buffer = ReadBuffer(r, parent->blkno);
1087 : 1 : LockBuffer(parent->buffer, GIST_EXCLUSIVE);
1088 : 1 : gistcheckpage(r, parent->buffer);
1089 : 1 : parent->page = (Page) BufferGetPage(parent->buffer);
1090 : : }
1091 : :
1092 : : /*
1093 : : * awful!!, we need search tree to find parent ... , but before we should
1094 : : * release all old parent
1095 : : */
1096 : :
201 heikki.linnakangas@i 1097 :UBC 0 : ptr = child->parent->parent; /* child->parent already released above */
1098 [ # # ]: 0 : while (ptr)
1099 : : {
1100 : 0 : ReleaseBuffer(ptr->buffer);
1101 : 0 : ptr = ptr->parent;
1102 : : }
1103 : :
1104 : : /* ok, find new path */
1105 : 0 : ptr = parent = gistFindPath(r, child->blkno, &child->downlinkoffnum);
1106 : :
1107 : : /* read all buffers as expected by caller */
1108 : : /* note we don't lock them or gistcheckpage them here! */
1109 [ # # ]: 0 : while (ptr)
1110 : : {
1111 : 0 : ptr->buffer = ReadBuffer(r, ptr->blkno);
1112 : 0 : ptr->page = (Page) BufferGetPage(ptr->buffer);
1113 : 0 : ptr = ptr->parent;
1114 : : }
1115 : :
1116 : : /* install new chain of parents to stack */
1117 : 0 : child->parent = parent;
1118 : :
1119 : : /* make recursive call to normal processing */
1120 : 0 : LockBuffer(child->parent->buffer, GIST_EXCLUSIVE);
1121 : 0 : gistFindCorrectParent(r, child, is_build);
1122 : : }
1123 : :
1124 : : /*
1125 : : * Form a downlink pointer for the page in 'buf'.
1126 : : */
1127 : : static IndexTuple
4861 1128 : 0 : gistformdownlink(Relation rel, Buffer buf, GISTSTATE *giststate,
1129 : : GISTInsertStack *stack, bool is_build)
1130 : : {
2916 kgrittn@postgresql.o 1131 : 0 : Page page = BufferGetPage(buf);
1132 : : OffsetNumber maxoff;
1133 : : OffsetNumber offset;
4753 bruce@momjian.us 1134 : 0 : IndexTuple downlink = NULL;
1135 : :
4861 heikki.linnakangas@i 1136 : 0 : maxoff = PageGetMaxOffsetNumber(page);
1137 [ # # ]: 0 : for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset))
1138 : : {
1139 : : IndexTuple ituple = (IndexTuple)
331 tgl@sss.pgh.pa.us 1140 : 0 : PageGetItem(page, PageGetItemId(page, offset));
1141 : :
4861 heikki.linnakangas@i 1142 [ # # ]: 0 : if (downlink == NULL)
1143 : 0 : downlink = CopyIndexTuple(ituple);
1144 : : else
1145 : : {
1146 : : IndexTuple newdownlink;
1147 : :
1148 : 0 : newdownlink = gistgetadjusted(rel, downlink, ituple,
1149 : : giststate);
1150 [ # # ]: 0 : if (newdownlink)
1151 : 0 : downlink = newdownlink;
1152 : : }
1153 : : }
1154 : :
1155 : : /*
1156 : : * If the page is completely empty, we can't form a meaningful downlink
1157 : : * for it. But we have to insert a downlink for the page. Any key will do,
1158 : : * as long as its consistent with the downlink of parent page, so that we
1159 : : * can legally insert it to the parent. A minimal one that matches as few
1160 : : * scans as possible would be best, to keep scans from doing useless work,
1161 : : * but we don't know how to construct that. So we just use the downlink of
1162 : : * the original page that was split - that's as far from optimal as it can
1163 : : * get but will do..
1164 : : */
1165 [ # # ]: 0 : if (!downlink)
1166 : : {
1167 : : ItemId iid;
1168 : :
1169 : 0 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
201 1170 : 0 : gistFindCorrectParent(rel, stack, is_build);
4657 1171 : 0 : iid = PageGetItemId(stack->parent->page, stack->downlinkoffnum);
4861 1172 : 0 : downlink = (IndexTuple) PageGetItem(stack->parent->page, iid);
1173 : 0 : downlink = CopyIndexTuple(downlink);
1174 : 0 : LockBuffer(stack->parent->buffer, GIST_UNLOCK);
1175 : : }
1176 : :
1177 : 0 : ItemPointerSetBlockNumber(&(downlink->t_tid), BufferGetBlockNumber(buf));
1178 : 0 : GistTupleSetValid(downlink);
1179 : :
1180 : 0 : return downlink;
1181 : : }
1182 : :
1183 : :
1184 : : /*
1185 : : * Complete the incomplete split of state->stack->page.
1186 : : */
1187 : : static void
1188 : 0 : gistfixsplit(GISTInsertState *state, GISTSTATE *giststate)
1189 : : {
1190 : 0 : GISTInsertStack *stack = state->stack;
1191 : : Buffer buf;
1192 : : Page page;
1193 : 0 : List *splitinfo = NIL;
1194 : :
1227 peter@eisentraut.org 1195 [ # # ]: 0 : ereport(LOG,
1196 : : (errmsg("fixing incomplete split in index \"%s\", block %u",
1197 : : RelationGetRelationName(state->r), stack->blkno)));
1198 : :
4861 heikki.linnakangas@i 1199 [ # # ]: 0 : Assert(GistFollowRight(stack->page));
4657 1200 [ # # # # : 0 : Assert(OffsetNumberIsValid(stack->downlinkoffnum));
# # ]
1201 : :
4861 1202 : 0 : buf = stack->buffer;
1203 : :
1204 : : /*
1205 : : * Read the chain of split pages, following the rightlinks. Construct a
1206 : : * downlink tuple for each page.
1207 : : */
1208 : : for (;;)
1209 : 0 : {
1210 : 0 : GISTPageSplitInfo *si = palloc(sizeof(GISTPageSplitInfo));
1211 : : IndexTuple downlink;
1212 : :
2916 kgrittn@postgresql.o 1213 : 0 : page = BufferGetPage(buf);
1214 : :
1215 : : /* Form the new downlink tuples to insert to parent */
201 heikki.linnakangas@i 1216 : 0 : downlink = gistformdownlink(state->r, buf, giststate, stack, state->is_build);
1217 : :
4861 1218 : 0 : si->buf = buf;
1219 : 0 : si->downlink = downlink;
1220 : :
1221 : 0 : splitinfo = lappend(splitinfo, si);
1222 : :
1223 [ # # ]: 0 : if (GistFollowRight(page))
1224 : : {
1225 : : /* lock next page */
1226 : 0 : buf = ReadBuffer(state->r, GistPageGetOpaque(page)->rightlink);
1227 : 0 : LockBuffer(buf, GIST_EXCLUSIVE);
1228 : : }
1229 : : else
1230 : 0 : break;
1231 : : }
1232 : :
1233 : : /* Insert the downlinks */
4356 1234 : 0 : gistfinishsplit(state, stack, giststate, splitinfo, false);
4861 1235 : 0 : }
1236 : :
1237 : : /*
1238 : : * Insert or replace a tuple in stack->buffer. If 'oldoffnum' is valid, the
1239 : : * tuple at 'oldoffnum' is replaced, otherwise the tuple is inserted as new.
1240 : : * 'stack' represents the path from the root to the page being updated.
1241 : : *
1242 : : * The caller must hold an exclusive lock on stack->buffer. The lock is still
1243 : : * held on return, but the page might not contain the inserted tuple if the
1244 : : * page was split. The function returns true if the page was split, false
1245 : : * otherwise.
1246 : : */
1247 : : static bool
4356 heikki.linnakangas@i 1248 :CBC 794714 : gistinserttuple(GISTInsertState *state, GISTInsertStack *stack,
1249 : : GISTSTATE *giststate, IndexTuple tuple, OffsetNumber oldoffnum)
1250 : : {
1251 : 794714 : return gistinserttuples(state, stack, giststate, &tuple, 1, oldoffnum,
1252 : : InvalidBuffer, InvalidBuffer, false, false);
1253 : : }
1254 : :
1255 : : /* ----------------
1256 : : * An extended workhorse version of gistinserttuple(). This version allows
1257 : : * inserting multiple tuples, or replacing a single tuple with multiple tuples.
1258 : : * This is used to recursively update the downlinks in the parent when a page
1259 : : * is split.
1260 : : *
1261 : : * If leftchild and rightchild are valid, we're inserting/replacing the
1262 : : * downlink for rightchild, and leftchild is its left sibling. We clear the
1263 : : * F_FOLLOW_RIGHT flag and update NSN on leftchild, atomically with the
1264 : : * insertion of the downlink.
1265 : : *
1266 : : * To avoid holding locks for longer than necessary, when recursing up the
1267 : : * tree to update the parents, the locking is a bit peculiar here. On entry,
1268 : : * the caller must hold an exclusive lock on stack->buffer, as well as
1269 : : * leftchild and rightchild if given. On return:
1270 : : *
1271 : : * - Lock on stack->buffer is released, if 'unlockbuf' is true. The page is
1272 : : * always kept pinned, however.
1273 : : * - Lock on 'leftchild' is released, if 'unlockleftchild' is true. The page
1274 : : * is kept pinned.
1275 : : * - Lock and pin on 'rightchild' are always released.
1276 : : *
1277 : : * Returns 'true' if the page had to be split. Note that if the page was
1278 : : * split, the inserted/updated tuples might've been inserted to a right
1279 : : * sibling of stack->buffer instead of stack->buffer itself.
1280 : : */
1281 : : static bool
4861 1282 : 806936 : gistinserttuples(GISTInsertState *state, GISTInsertStack *stack,
1283 : : GISTSTATE *giststate,
1284 : : IndexTuple *tuples, int ntup, OffsetNumber oldoffnum,
1285 : : Buffer leftchild, Buffer rightchild,
1286 : : bool unlockbuf, bool unlockleftchild)
1287 : : {
1288 : : List *splitinfo;
1289 : : bool is_split;
1290 : :
1291 : : /*
1292 : : * Check for any rw conflicts (in serializable isolation level) just
1293 : : * before we intend to modify the page
1294 : : */
1538 tmunro@postgresql.or 1295 : 806936 : CheckForSerializableConflictIn(state->r, NULL, BufferGetBlockNumber(stack->buffer));
1296 : :
1297 : : /* Insert the tuple(s) to the page, splitting the page if necessary */
4602 heikki.linnakangas@i 1298 : 806930 : is_split = gistplacetopage(state->r, state->freespace, giststate,
1299 : : stack->buffer,
1300 : : tuples, ntup,
1301 : : oldoffnum, NULL,
1302 : : leftchild,
1303 : : &splitinfo,
1304 : : true,
1305 : : state->heapRel,
1838 1306 : 806930 : state->is_build);
1307 : :
1308 : : /*
1309 : : * Before recursing up in case the page was split, release locks on the
1310 : : * child pages. We don't need to keep them locked when updating the
1311 : : * parent.
1312 : : */
4356 1313 [ + + ]: 806930 : if (BufferIsValid(rightchild))
1314 : 12222 : UnlockReleaseBuffer(rightchild);
1315 [ + + + + ]: 806930 : if (BufferIsValid(leftchild) && unlockleftchild)
1316 : 2522 : LockBuffer(leftchild, GIST_UNLOCK);
1317 : :
1318 : : /*
1319 : : * If we had to split, insert/update the downlinks in the parent. If the
1320 : : * caller requested us to release the lock on stack->buffer, tell
1321 : : * gistfinishsplit() to do that as soon as it's safe to do so. If we
1322 : : * didn't have to split, release it ourselves.
1323 : : */
4861 1324 [ + + ]: 806930 : if (splitinfo)
4356 1325 : 12101 : gistfinishsplit(state, stack, giststate, splitinfo, unlockbuf);
1326 [ + + ]: 794829 : else if (unlockbuf)
1327 : 9579 : LockBuffer(stack->buffer, GIST_UNLOCK);
1328 : :
4861 1329 : 806930 : return is_split;
1330 : : }
1331 : :
1332 : : /*
1333 : : * Finish an incomplete split by inserting/updating the downlinks in parent
1334 : : * page. 'splitinfo' contains all the child pages involved in the split,
1335 : : * from left-to-right.
1336 : : *
1337 : : * On entry, the caller must hold a lock on stack->buffer and all the child
1338 : : * pages in 'splitinfo'. If 'unlockbuf' is true, the lock on stack->buffer is
1339 : : * released on return. The child pages are always unlocked and unpinned.
1340 : : */
1341 : : static void
1342 : 12101 : gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
1343 : : GISTSTATE *giststate, List *splitinfo, bool unlockbuf)
1344 : : {
1345 : : GISTPageSplitInfo *right;
1346 : : GISTPageSplitInfo *left;
1347 : : IndexTuple tuples[2];
1348 : :
1349 : : /* A split always contains at least two halves */
1350 [ - + ]: 12101 : Assert(list_length(splitinfo) >= 2);
1351 : :
1352 : : /*
1353 : : * We need to insert downlinks for each new page, and update the downlink
1354 : : * for the original (leftmost) page in the split. Begin at the rightmost
1355 : : * page, inserting one downlink at a time until there's only two pages
1356 : : * left. Finally insert the downlink for the last new page and update the
1357 : : * downlink for the original page as one operation.
1358 : : */
1359 : 12101 : LockBuffer(stack->parent->buffer, GIST_EXCLUSIVE);
1360 : :
1361 : : /*
1362 : : * Insert downlinks for the siblings from right to left, until there are
1363 : : * only two siblings left.
1364 : : */
1733 tgl@sss.pgh.pa.us 1365 [ + + ]: 12222 : for (int pos = list_length(splitinfo) - 1; pos > 1; pos--)
1366 : : {
1367 : 121 : right = (GISTPageSplitInfo *) list_nth(splitinfo, pos);
1368 : 121 : left = (GISTPageSplitInfo *) list_nth(splitinfo, pos - 1);
1369 : :
201 heikki.linnakangas@i 1370 : 121 : gistFindCorrectParent(state->r, stack, state->is_build);
4861 1371 [ + + ]: 121 : if (gistinserttuples(state, stack->parent, giststate,
1372 : : &right->downlink, 1,
1373 : : InvalidOffsetNumber,
1374 : : left->buf, right->buf, false, false))
1375 : : {
1376 : : /*
1377 : : * If the parent page was split, the existing downlink might have
1378 : : * moved.
1379 : : */
1584 1380 : 1 : stack->downlinkoffnum = InvalidOffsetNumber;
1381 : : }
1382 : : /* gistinserttuples() released the lock on right->buf. */
1383 : : }
1384 : :
1733 tgl@sss.pgh.pa.us 1385 : 12101 : right = (GISTPageSplitInfo *) lsecond(splitinfo);
1386 : 12101 : left = (GISTPageSplitInfo *) linitial(splitinfo);
1387 : :
1388 : : /*
1389 : : * Finally insert downlink for the remaining right page and update the
1390 : : * downlink for the original page to not contain the tuples that were
1391 : : * moved to the new pages.
1392 : : */
4861 heikki.linnakangas@i 1393 : 12101 : tuples[0] = left->downlink;
1394 : 12101 : tuples[1] = right->downlink;
201 1395 : 12101 : gistFindCorrectParent(state->r, stack, state->is_build);
1396 : 12101 : (void) gistinserttuples(state, stack->parent, giststate,
1397 : : tuples, 2,
1398 : 12101 : stack->downlinkoffnum,
1399 : : left->buf, right->buf,
1400 : : true, /* Unlock parent */
1401 : : unlockbuf /* Unlock stack->buffer if caller
1402 : : * wants that */
1403 : : );
1404 : :
1405 : : /*
1406 : : * The downlink might have moved when we updated it. Even if the page
1407 : : * wasn't split, because gistinserttuples() implements updating the old
1408 : : * tuple by removing and re-inserting it!
1409 : : */
1410 : 12101 : stack->downlinkoffnum = InvalidOffsetNumber;
1411 : :
4861 1412 [ - + ]: 12101 : Assert(left->buf == stack->buffer);
1413 : :
1414 : : /*
1415 : : * If we split the page because we had to adjust the downlink on an
1416 : : * internal page, while descending the tree for inserting a new tuple,
1417 : : * then this might no longer be the correct page for the new tuple. The
1418 : : * downlink to this page might not cover the new tuple anymore, it might
1419 : : * need to go to the newly-created right sibling instead. Tell the caller
1420 : : * to walk back up the stack, to re-check at the parent which page to
1421 : : * insert to.
1422 : : *
1423 : : * Normally, the LSN-NSN interlock during the tree descend would also
1424 : : * detect that a concurrent split happened (by ourselves), and cause us to
1425 : : * retry at the parent. But that mechanism doesn't work during index
1426 : : * build, because we don't do WAL-logging, and don't update LSNs, during
1427 : : * index build.
1428 : : */
1797 1429 : 12101 : stack->retry_from_parent = true;
6873 teodor@sigaev.ru 1430 : 12101 : }
1431 : :
1432 : : /*
1433 : : * gistSplit -- split a page in the tree and fill struct
1434 : : * used for XLOG and real writes buffers. Function is recursive, ie
1435 : : * it will split page until keys will fit in every page.
1436 : : */
1437 : : SplitPageLayout *
10093 scrappy@hub.org 1438 : 13376 : gistSplit(Relation r,
1439 : : Page page,
1440 : : IndexTuple *itup, /* contains compressed entry */
1441 : : int len,
1442 : : GISTSTATE *giststate)
1443 : : {
1444 : : IndexTuple *lvectup,
1445 : : *rvectup;
1446 : : GistSplitVector v;
1447 : : int i;
103 rhaas@postgresql.org 1448 :GNC 13376 : SplitPageLayout *res = NULL;
1449 : :
1450 : : /* this should never recurse very deeply, but better safe than sorry */
3481 heikki.linnakangas@i 1451 :CBC 13376 : check_stack_depth();
1452 : :
1453 : : /* there's no point in splitting an empty page */
1454 [ - + ]: 13376 : Assert(len > 0);
1455 : :
1456 : : /*
1457 : : * If a single tuple doesn't fit on a page, no amount of splitting will
1458 : : * help.
1459 : : */
1460 [ - + ]: 13376 : if (len == 1)
3481 heikki.linnakangas@i 1461 [ # # ]:UBC 0 : ereport(ERROR,
1462 : : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1463 : : errmsg("index row size %zu exceeds maximum %zu for index \"%s\"",
1464 : : IndexTupleSize(itup[0]), GiSTPageSize,
1465 : : RelationGetRelationName(r))));
1466 : :
1862 akorotkov@postgresql 1467 :CBC 13376 : memset(v.spl_lisnull, true,
1468 : 13376 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
1469 : 13376 : memset(v.spl_risnull, true,
1470 : 13376 : sizeof(bool) * giststate->nonLeafTupdesc->natts);
4081 tgl@sss.pgh.pa.us 1471 : 13376 : gistSplitByKey(r, page, itup, len, giststate, &v, 0);
1472 : :
1473 : : /* form left and right vector */
6549 teodor@sigaev.ru 1474 : 13376 : lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1475 : 13376 : rvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
1476 : :
6500 1477 [ + + ]: 549144 : for (i = 0; i < v.splitVector.spl_nleft; i++)
1478 : 535768 : lvectup[i] = itup[v.splitVector.spl_left[i] - 1];
1479 : :
1480 [ + + ]: 614364 : for (i = 0; i < v.splitVector.spl_nright; i++)
1481 : 600988 : rvectup[i] = itup[v.splitVector.spl_right[i] - 1];
1482 : :
1483 : : /* finalize splitting (may need another split) */
1484 [ + + ]: 13376 : if (!gistfitpage(rvectup, v.splitVector.spl_nright))
1485 : : {
1486 : 296 : res = gistSplit(r, page, rvectup, v.splitVector.spl_nright, giststate);
1487 : : }
1488 : : else
1489 : : {
6549 1490 : 13080 : ROTATEDIST(res);
6500 1491 : 13080 : res->block.num = v.splitVector.spl_nright;
6402 bruce@momjian.us 1492 : 13080 : res->list = gistfillitupvec(rvectup, v.splitVector.spl_nright, &(res->lenlist));
4861 heikki.linnakangas@i 1493 : 13080 : res->itup = gistFormTuple(giststate, r, v.spl_rattr, v.spl_risnull, false);
1494 : : }
1495 : :
6500 teodor@sigaev.ru 1496 [ + + ]: 13376 : if (!gistfitpage(lvectup, v.splitVector.spl_nleft))
1497 : : {
1498 : : SplitPageLayout *resptr,
1499 : : *subres;
1500 : :
1501 : 252 : resptr = subres = gistSplit(r, page, lvectup, v.splitVector.spl_nleft, giststate);
1502 : :
1503 : : /* install on list's tail */
6402 bruce@momjian.us 1504 [ + + ]: 585 : while (resptr->next)
6549 teodor@sigaev.ru 1505 : 333 : resptr = resptr->next;
1506 : :
1507 : 252 : resptr->next = res;
1508 : 252 : res = subres;
1509 : : }
1510 : : else
1511 : : {
1512 : 13124 : ROTATEDIST(res);
6500 1513 : 13124 : res->block.num = v.splitVector.spl_nleft;
6402 bruce@momjian.us 1514 : 13124 : res->list = gistfillitupvec(lvectup, v.splitVector.spl_nleft, &(res->lenlist));
4861 heikki.linnakangas@i 1515 : 13124 : res->itup = gistFormTuple(giststate, r, v.spl_lattr, v.spl_lisnull, false);
1516 : : }
1517 : :
6549 teodor@sigaev.ru 1518 : 13376 : return res;
1519 : : }
1520 : :
1521 : : /*
1522 : : * Create a GISTSTATE and fill it with information about the index
1523 : : */
1524 : : GISTSTATE *
4580 tgl@sss.pgh.pa.us 1525 : 3504 : initGISTstate(Relation index)
1526 : : {
1527 : : GISTSTATE *giststate;
1528 : : MemoryContext scanCxt;
1529 : : MemoryContext oldCxt;
1530 : : int i;
1531 : :
1532 : : /* safety check to protect fixed-size arrays in GISTSTATE */
8272 1533 [ - + ]: 3504 : if (index->rd_att->natts > INDEX_MAX_KEYS)
7573 tgl@sss.pgh.pa.us 1534 [ # # ]:UBC 0 : elog(ERROR, "numberOfAttributes %d > %d",
1535 : : index->rd_att->natts, INDEX_MAX_KEYS);
1536 : :
1537 : : /* Create the memory context that will hold the GISTSTATE */
4580 tgl@sss.pgh.pa.us 1538 :CBC 3504 : scanCxt = AllocSetContextCreate(CurrentMemoryContext,
1539 : : "GiST scan context",
1540 : : ALLOCSET_DEFAULT_SIZES);
1541 : 3504 : oldCxt = MemoryContextSwitchTo(scanCxt);
1542 : :
1543 : : /* Create and fill in the GISTSTATE */
1544 : 3504 : giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE));
1545 : :
1546 : 3504 : giststate->scanCxt = scanCxt;
2489 1547 : 3504 : giststate->tempCxt = scanCxt; /* caller must change this if needed */
1862 akorotkov@postgresql 1548 : 3504 : giststate->leafTupdesc = index->rd_att;
1549 : :
1550 : : /*
1551 : : * The truncated tupdesc for non-leaf index tuples, which doesn't contain
1552 : : * the INCLUDE attributes.
1553 : : *
1554 : : * It is used to form tuples during tuple adjustment and page split.
1555 : : * B-tree creates shortened tuple descriptor for every truncated tuple,
1556 : : * because it is doing this less often: it does not have to form truncated
1557 : : * tuples during page split. Also, B-tree is not adjusting tuples on
1558 : : * internal pages the way GiST does.
1559 : : */
1560 : 3504 : giststate->nonLeafTupdesc = CreateTupleDescCopyConstr(index->rd_att);
1561 : 3504 : giststate->nonLeafTupdesc->natts =
1562 : 3504 : IndexRelationGetNumberOfKeyAttributes(index);
1563 : :
1564 [ + + ]: 8893 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(index); i++)
1565 : : {
8226 tgl@sss.pgh.pa.us 1566 : 5389 : fmgr_info_copy(&(giststate->consistentFn[i]),
6907 neilc@samurai.com 1567 : 5389 : index_getprocinfo(index, i + 1, GIST_CONSISTENT_PROC),
1568 : : scanCxt);
8226 tgl@sss.pgh.pa.us 1569 : 5389 : fmgr_info_copy(&(giststate->unionFn[i]),
8207 bruce@momjian.us 1570 : 5389 : index_getprocinfo(index, i + 1, GIST_UNION_PROC),
1571 : : scanCxt);
1572 : :
1573 : : /* opclasses are not required to provide a Compress method */
2399 tgl@sss.pgh.pa.us 1574 [ + + ]: 5389 : if (OidIsValid(index_getprocid(index, i + 1, GIST_COMPRESS_PROC)))
1575 : 2002 : fmgr_info_copy(&(giststate->compressFn[i]),
1576 : 2002 : index_getprocinfo(index, i + 1, GIST_COMPRESS_PROC),
1577 : : scanCxt);
1578 : : else
1579 : 3387 : giststate->compressFn[i].fn_oid = InvalidOid;
1580 : :
1581 : : /* opclasses are not required to provide a Decompress method */
1582 [ + + ]: 5389 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DECOMPRESS_PROC)))
1583 : 796 : fmgr_info_copy(&(giststate->decompressFn[i]),
1584 : 796 : index_getprocinfo(index, i + 1, GIST_DECOMPRESS_PROC),
1585 : : scanCxt);
1586 : : else
1587 : 4593 : giststate->decompressFn[i].fn_oid = InvalidOid;
1588 : :
8226 1589 : 5389 : fmgr_info_copy(&(giststate->penaltyFn[i]),
8207 bruce@momjian.us 1590 : 5389 : index_getprocinfo(index, i + 1, GIST_PENALTY_PROC),
1591 : : scanCxt);
8226 tgl@sss.pgh.pa.us 1592 : 5389 : fmgr_info_copy(&(giststate->picksplitFn[i]),
6907 neilc@samurai.com 1593 : 5389 : index_getprocinfo(index, i + 1, GIST_PICKSPLIT_PROC),
1594 : : scanCxt);
8226 tgl@sss.pgh.pa.us 1595 : 5389 : fmgr_info_copy(&(giststate->equalFn[i]),
8207 bruce@momjian.us 1596 : 5389 : index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
1597 : : scanCxt);
1598 : :
1599 : : /* opclasses are not required to provide a Distance method */
4881 tgl@sss.pgh.pa.us 1600 [ + + ]: 5389 : if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
1601 : 953 : fmgr_info_copy(&(giststate->distanceFn[i]),
2489 1602 : 953 : index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
1603 : : scanCxt);
1604 : : else
4881 1605 : 4436 : giststate->distanceFn[i].fn_oid = InvalidOid;
1606 : :
1607 : : /* opclasses are not required to provide a Fetch method */
3307 heikki.linnakangas@i 1608 [ + + ]: 5389 : if (OidIsValid(index_getprocid(index, i + 1, GIST_FETCH_PROC)))
1609 : 679 : fmgr_info_copy(&(giststate->fetchFn[i]),
3249 bruce@momjian.us 1610 : 679 : index_getprocinfo(index, i + 1, GIST_FETCH_PROC),
1611 : : scanCxt);
1612 : : else
3307 heikki.linnakangas@i 1613 : 4710 : giststate->fetchFn[i].fn_oid = InvalidOid;
1614 : :
1615 : : /*
1616 : : * If the index column has a specified collation, we should honor that
1617 : : * while doing comparisons. However, we may have a collatable storage
1618 : : * type for a noncollatable indexed data type. If there's no index
1619 : : * collation then specify default collation in case the support
1620 : : * functions need collation. This is harmless if the support
1621 : : * functions don't care about collation, so we just do it
1622 : : * unconditionally. (We could alternatively call get_typcollation,
1623 : : * but that seems like expensive overkill --- there aren't going to be
1624 : : * any cases where a GiST storage type has a nondefault collation.)
1625 : : */
4741 tgl@sss.pgh.pa.us 1626 [ + + ]: 5389 : if (OidIsValid(index->rd_indcollation[i]))
1627 : 96 : giststate->supportCollation[i] = index->rd_indcollation[i];
1628 : : else
1629 : 5293 : giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
1630 : : }
1631 : :
1632 : : /* No opclass information for INCLUDE attributes */
1862 akorotkov@postgresql 1633 [ + + ]: 3755 : for (; i < index->rd_att->natts; i++)
1634 : : {
1635 : 251 : giststate->consistentFn[i].fn_oid = InvalidOid;
1636 : 251 : giststate->unionFn[i].fn_oid = InvalidOid;
1637 : 251 : giststate->compressFn[i].fn_oid = InvalidOid;
1638 : 251 : giststate->decompressFn[i].fn_oid = InvalidOid;
1639 : 251 : giststate->penaltyFn[i].fn_oid = InvalidOid;
1640 : 251 : giststate->picksplitFn[i].fn_oid = InvalidOid;
1641 : 251 : giststate->equalFn[i].fn_oid = InvalidOid;
1642 : 251 : giststate->distanceFn[i].fn_oid = InvalidOid;
1643 : 251 : giststate->fetchFn[i].fn_oid = InvalidOid;
1644 : 251 : giststate->supportCollation[i] = InvalidOid;
1645 : : }
1646 : :
4580 tgl@sss.pgh.pa.us 1647 : 3504 : MemoryContextSwitchTo(oldCxt);
1648 : :
1649 : 3504 : return giststate;
1650 : : }
1651 : :
1652 : : void
8207 bruce@momjian.us 1653 : 2802 : freeGISTstate(GISTSTATE *giststate)
1654 : : {
1655 : : /* It's sufficient to delete the scanCxt */
4580 tgl@sss.pgh.pa.us 1656 : 2802 : MemoryContextDelete(giststate->scanCxt);
10093 scrappy@hub.org 1657 : 2802 : }
1658 : :
1659 : : /*
1660 : : * gistprunepage() -- try to remove LP_DEAD items from the given page.
1661 : : * Function assumes that buffer is exclusively locked.
1662 : : */
1663 : : static void
1867 heikki.linnakangas@i 1664 :UBC 0 : gistprunepage(Relation rel, Page page, Buffer buffer, Relation heapRel)
1665 : : {
1666 : : OffsetNumber deletable[MaxIndexTuplesPerPage];
2866 rhaas@postgresql.org 1667 : 0 : int ndeletable = 0;
1668 : : OffsetNumber offnum,
1669 : : maxoff;
1670 : :
3140 teodor@sigaev.ru 1671 [ # # ]: 0 : Assert(GistPageIsLeaf(page));
1672 : :
1673 : : /*
1674 : : * Scan over all items to see which ones need to be deleted according to
1675 : : * LP_DEAD flags.
1676 : : */
1677 : 0 : maxoff = PageGetMaxOffsetNumber(page);
1678 : 0 : for (offnum = FirstOffsetNumber;
1679 [ # # ]: 0 : offnum <= maxoff;
1680 : 0 : offnum = OffsetNumberNext(offnum))
1681 : : {
1682 : 0 : ItemId itemId = PageGetItemId(page, offnum);
1683 : :
1684 [ # # ]: 0 : if (ItemIdIsDead(itemId))
1685 : 0 : deletable[ndeletable++] = offnum;
1686 : : }
1687 : :
1688 [ # # ]: 0 : if (ndeletable > 0)
1689 : : {
514 pg@bowt.ie 1690 : 0 : TransactionId snapshotConflictHorizon = InvalidTransactionId;
1691 : :
1174 1692 [ # # # # : 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
# # # # #
# ]
1693 : : snapshotConflictHorizon =
1694 : 0 : index_compute_xid_horizon_for_tuples(rel, heapRel, buffer,
1695 : : deletable, ndeletable);
1696 : :
3140 teodor@sigaev.ru 1697 : 0 : START_CRIT_SECTION();
1698 : :
1699 : 0 : PageIndexMultiDelete(page, deletable, ndeletable);
1700 : :
1701 : : /*
1702 : : * Mark the page as not containing any LP_DEAD items. This is not
1703 : : * certainly true (there might be some that have recently been marked,
1704 : : * but weren't included in our target-item list), but it will almost
1705 : : * always be true and it doesn't seem worth an additional page scan to
1706 : : * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
1707 : : */
1708 : 0 : GistClearPageHasGarbage(page);
1709 : :
1710 : 0 : MarkBufferDirty(buffer);
1711 : :
1712 : : /* XLOG stuff */
1713 [ # # # # : 0 : if (RelationNeedsWAL(rel))
# # # # ]
1714 : 0 : {
1715 : : XLogRecPtr recptr;
1716 : :
1941 akorotkov@postgresql 1717 : 0 : recptr = gistXLogDelete(buffer,
1718 : : deletable, ndeletable,
1719 : : snapshotConflictHorizon,
1720 : : heapRel);
1721 : :
3140 teodor@sigaev.ru 1722 : 0 : PageSetLSN(page, recptr);
1723 : : }
1724 : : else
1725 : 0 : PageSetLSN(page, gistGetFakeLSN(rel));
1726 : :
1727 [ # # ]: 0 : END_CRIT_SECTION();
1728 : : }
1729 : :
1730 : : /*
1731 : : * Note: if we didn't find any LP_DEAD items, then the page's
1732 : : * F_HAS_GARBAGE hint bit is falsely set. We do not bother expending a
1733 : : * separate write to clear it, however. We will clear it when we split
1734 : : * the page.
1735 : : */
1736 : 0 : }
|