Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * ginbtree.c
4 : * page utilities routines for the postgres inverted index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gin/ginbtree.c
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "access/gin_private.h"
18 : #include "access/ginxlog.h"
19 : #include "access/xloginsert.h"
20 : #include "miscadmin.h"
21 : #include "storage/predicate.h"
22 : #include "utils/memutils.h"
23 : #include "utils/rel.h"
24 :
25 : static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
26 : static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
27 : void *insertdata, BlockNumber updateblkno,
28 : Buffer childbuf, GinStatsData *buildStats);
29 : static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
30 : bool freestack, GinStatsData *buildStats);
31 :
32 : /*
33 : * Lock buffer by needed method for search.
34 : */
35 : int
6031 bruce 36 CBC 597663 : ginTraverseLock(Buffer buffer, bool searchMode)
37 : {
38 : Page page;
39 597663 : int access = GIN_SHARE;
40 :
6186 teodor 41 597663 : LockBuffer(buffer, GIN_SHARE);
2545 kgrittn 42 597663 : page = BufferGetPage(buffer);
6031 bruce 43 597663 : if (GinPageIsLeaf(page))
44 : {
2062 peter_e 45 322279 : if (searchMode == false)
46 : {
47 : /* we should relock our page */
6186 teodor 48 320392 : LockBuffer(buffer, GIN_UNLOCK);
49 320392 : LockBuffer(buffer, GIN_EXCLUSIVE);
50 :
51 : /* But root can become non-leaf during relock */
6031 bruce 52 320392 : if (!GinPageIsLeaf(page))
53 : {
54 : /* restore old lock type (very rare) */
6186 teodor 55 UBC 0 : LockBuffer(buffer, GIN_UNLOCK);
56 0 : LockBuffer(buffer, GIN_SHARE);
57 : }
58 : else
6186 teodor 59 CBC 320392 : access = GIN_EXCLUSIVE;
60 : }
61 : }
62 :
63 597663 : return access;
64 : }
65 :
66 : /*
67 : * Descend the tree to the leaf page that contains or would contain the key
68 : * we're searching for. The key should already be filled in 'btree', in
69 : * tree-type specific manner. If btree->fullScan is true, descends to the
70 : * leftmost leaf page.
71 : *
72 : * If 'searchmode' is false, on return stack->buffer is exclusively locked,
73 : * and the stack represents the full path to the root. Otherwise stack->buffer
74 : * is share-locked, and stack->parent is NULL.
75 : *
76 : * If 'rootConflictCheck' is true, tree root is checked for serialization
77 : * conflict.
78 : */
79 : GinBtreeStack *
1564 akorotkov 80 322281 : ginFindLeafPage(GinBtree btree, bool searchMode,
81 : bool rootConflictCheck, Snapshot snapshot)
82 : {
83 : GinBtreeStack *stack;
84 :
3427 heikki.linnakangas 85 322281 : stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
3420 86 322281 : stack->blkno = btree->rootBlkno;
87 322281 : stack->buffer = ReadBuffer(btree->index, btree->rootBlkno);
3427 88 322281 : stack->parent = NULL;
89 322281 : stack->predictNumber = 1;
90 :
1564 akorotkov 91 322281 : if (rootConflictCheck)
1167 tmunro 92 24774 : CheckForSerializableConflictIn(btree->index, NULL, btree->rootBlkno);
93 :
94 : for (;;)
6031 bruce 95 275384 : {
96 : Page page;
97 : BlockNumber child;
98 : int access;
99 :
6186 teodor 100 597663 : stack->off = InvalidOffsetNumber;
101 :
2545 kgrittn 102 597663 : page = BufferGetPage(stack->buffer);
103 597663 : TestForOldSnapshot(snapshot, btree->index, page);
104 :
3427 heikki.linnakangas 105 597663 : access = ginTraverseLock(stack->buffer, searchMode);
106 :
107 : /*
108 : * If we're going to modify the tree, finish any incomplete splits we
109 : * encounter on the way.
110 : */
3420 111 597663 : if (!searchMode && GinPageIsIncompleteSplit(page))
3420 heikki.linnakangas 112 UBC 0 : ginFinishSplit(btree, stack, false, NULL);
113 :
114 : /*
115 : * ok, page is correctly locked, we should check to move right ..,
116 : * root never has a right link, so small optimization
117 : */
2062 peter_e 118 CBC 873009 : while (btree->fullScan == false && stack->blkno != btree->rootBlkno &&
4475 tgl 119 275346 : btree->isMoveRight(btree, page))
120 : {
6186 teodor 121 UBC 0 : BlockNumber rightlink = GinPageGetOpaque(page)->rightlink;
122 :
6031 bruce 123 0 : if (rightlink == InvalidBlockNumber)
124 : /* rightmost page */
6186 teodor 125 0 : break;
126 :
3439 heikki.linnakangas 127 0 : stack->buffer = ginStepRight(stack->buffer, btree->index, access);
6186 teodor 128 0 : stack->blkno = rightlink;
2545 kgrittn 129 0 : page = BufferGetPage(stack->buffer);
130 0 : TestForOldSnapshot(snapshot, btree->index, page);
131 :
3420 heikki.linnakangas 132 0 : if (!searchMode && GinPageIsIncompleteSplit(page))
133 0 : ginFinishSplit(btree, stack, false, NULL);
134 : }
135 :
6031 bruce 136 CBC 597663 : if (GinPageIsLeaf(page)) /* we found, return locked page */
6186 teodor 137 322279 : return stack;
138 :
139 : /* now we have correct buffer, try to find child */
140 275384 : child = btree->findChildPage(btree, stack);
141 :
142 275384 : LockBuffer(stack->buffer, GIN_UNLOCK);
6031 bruce 143 275384 : Assert(child != InvalidBlockNumber);
144 275384 : Assert(stack->blkno != child);
145 :
3427 heikki.linnakangas 146 275384 : if (searchMode)
147 : {
148 : /* in search mode we may forget path to leaf */
6186 teodor 149 1297 : stack->blkno = child;
6031 bruce 150 1297 : stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
151 : }
152 : else
153 : {
154 274087 : GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
155 :
6186 teodor 156 274087 : ptr->parent = stack;
157 274087 : stack = ptr;
158 274087 : stack->blkno = child;
159 274087 : stack->buffer = ReadBuffer(btree->index, stack->blkno);
160 274087 : stack->predictNumber = 1;
161 : }
162 : }
163 : }
164 :
165 : /*
166 : * Step right from current page.
167 : *
168 : * The next page is locked first, before releasing the current page. This is
169 : * crucial to protect from concurrent page deletion (see comment in
170 : * ginDeletePage).
171 : */
172 : Buffer
3439 heikki.linnakangas 173 1768 : ginStepRight(Buffer buffer, Relation index, int lockmode)
174 : {
175 : Buffer nextbuffer;
2545 kgrittn 176 1768 : Page page = BufferGetPage(buffer);
3439 heikki.linnakangas 177 1768 : bool isLeaf = GinPageIsLeaf(page);
178 1768 : bool isData = GinPageIsData(page);
3420 179 1768 : BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
180 :
3439 181 1768 : nextbuffer = ReadBuffer(index, blkno);
182 1768 : LockBuffer(nextbuffer, lockmode);
183 1768 : UnlockReleaseBuffer(buffer);
184 :
185 : /* Sanity check that the page we stepped to is of similar kind. */
2545 kgrittn 186 1768 : page = BufferGetPage(nextbuffer);
3439 heikki.linnakangas 187 1768 : if (isLeaf != GinPageIsLeaf(page) || isData != GinPageIsData(page))
3439 heikki.linnakangas 188 UBC 0 : elog(ERROR, "right sibling of GIN page is of different type");
189 :
3439 heikki.linnakangas 190 CBC 1768 : return nextbuffer;
191 : }
192 :
193 : void
6031 bruce 194 322273 : freeGinBtreeStack(GinBtreeStack *stack)
195 : {
196 916890 : while (stack)
197 : {
198 594617 : GinBtreeStack *tmp = stack->parent;
199 :
200 594617 : if (stack->buffer != InvalidBuffer)
6186 teodor 201 594617 : ReleaseBuffer(stack->buffer);
202 :
6031 bruce 203 594617 : pfree(stack);
6186 teodor 204 594617 : stack = tmp;
205 : }
206 322273 : }
207 :
208 : /*
209 : * Try to find parent for current stack position. Returns correct parent and
210 : * child's offset in stack->parent. The root page is never released, to
211 : * prevent conflict with vacuum process.
212 : */
213 : static void
3420 heikki.linnakangas 214 UBC 0 : ginFindParents(GinBtree btree, GinBtreeStack *stack)
215 : {
216 : Page page;
217 : Buffer buffer;
218 : BlockNumber blkno,
219 : leftmostBlkno;
220 : OffsetNumber offset;
221 : GinBtreeStack *root;
222 : GinBtreeStack *ptr;
223 :
224 : /*
225 : * Unwind the stack all the way up to the root, leaving only the root
226 : * item.
227 : *
228 : * Be careful not to release the pin on the root page! The pin on root
229 : * page is required to lock out concurrent vacuums on the tree.
230 : */
231 0 : root = stack->parent;
232 0 : while (root->parent)
233 : {
234 0 : ReleaseBuffer(root->buffer);
235 0 : root = root->parent;
236 : }
237 :
238 0 : Assert(root->blkno == btree->rootBlkno);
239 0 : Assert(BufferGetBlockNumber(root->buffer) == btree->rootBlkno);
6186 teodor 240 0 : root->off = InvalidOffsetNumber;
241 :
3420 heikki.linnakangas 242 0 : blkno = root->blkno;
243 0 : buffer = root->buffer;
244 :
245 0 : ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack));
246 :
247 : for (;;)
248 : {
6186 teodor 249 0 : LockBuffer(buffer, GIN_EXCLUSIVE);
2545 kgrittn 250 0 : page = BufferGetPage(buffer);
6031 bruce 251 0 : if (GinPageIsLeaf(page))
6186 teodor 252 0 : elog(ERROR, "Lost path");
253 :
3420 heikki.linnakangas 254 0 : if (GinPageIsIncompleteSplit(page))
255 : {
256 0 : Assert(blkno != btree->rootBlkno);
257 0 : ptr->blkno = blkno;
258 0 : ptr->buffer = buffer;
259 :
260 : /*
261 : * parent may be wrong, but if so, the ginFinishSplit call will
262 : * recurse to call ginFindParents again to fix it.
263 : */
264 0 : ptr->parent = root;
265 0 : ptr->off = InvalidOffsetNumber;
266 :
267 0 : ginFinishSplit(btree, ptr, false, NULL);
268 : }
269 :
3427 270 0 : leftmostBlkno = btree->getLeftMostChild(btree, page);
271 :
6031 bruce 272 0 : while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber)
273 : {
6186 teodor 274 0 : blkno = GinPageGetOpaque(page)->rightlink;
6031 bruce 275 0 : if (blkno == InvalidBlockNumber)
276 : {
3439 heikki.linnakangas 277 0 : UnlockReleaseBuffer(buffer);
6186 teodor 278 0 : break;
279 : }
3439 heikki.linnakangas 280 0 : buffer = ginStepRight(buffer, btree->index, GIN_EXCLUSIVE);
2545 kgrittn 281 0 : page = BufferGetPage(buffer);
282 :
283 : /* finish any incomplete splits, as above */
3420 heikki.linnakangas 284 0 : if (GinPageIsIncompleteSplit(page))
285 : {
286 0 : Assert(blkno != btree->rootBlkno);
287 0 : ptr->blkno = blkno;
288 0 : ptr->buffer = buffer;
289 0 : ptr->parent = root;
290 0 : ptr->off = InvalidOffsetNumber;
291 :
292 0 : ginFinishSplit(btree, ptr, false, NULL);
293 : }
294 : }
295 :
6031 bruce 296 0 : if (blkno != InvalidBlockNumber)
297 : {
6186 teodor 298 0 : ptr->blkno = blkno;
299 0 : ptr->buffer = buffer;
3420 heikki.linnakangas 300 0 : ptr->parent = root; /* it may be wrong, but in next call we will
301 : * correct */
6162 teodor 302 0 : ptr->off = offset;
6186 303 0 : stack->parent = ptr;
304 0 : return;
305 : }
306 :
307 : /* Descend down to next level */
308 0 : blkno = leftmostBlkno;
3420 heikki.linnakangas 309 0 : buffer = ReadBuffer(btree->index, blkno);
310 : }
311 : }
312 :
313 : /*
314 : * Insert a new item to a page.
315 : *
316 : * Returns true if the insertion was finished. On false, the page was split and
317 : * the parent needs to be updated. (A root split returns true as it doesn't
318 : * need any further action by the caller to complete.)
319 : *
320 : * When inserting a downlink to an internal page, 'childbuf' contains the
321 : * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
322 : * atomically with the insert. Also, the existing item at offset stack->off
323 : * in the target page is updated to point to updateblkno.
324 : *
325 : * stack->buffer is locked on entry, and is kept locked.
326 : * Likewise for childbuf, if given.
327 : */
328 : static bool
3420 heikki.linnakangas 329 CBC 297427 : ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
330 : void *insertdata, BlockNumber updateblkno,
331 : Buffer childbuf, GinStatsData *buildStats)
332 : {
2545 kgrittn 333 297427 : Page page = BufferGetPage(stack->buffer);
334 : bool result;
335 : GinPlaceToPageRC rc;
3420 heikki.linnakangas 336 297427 : uint16 xlflags = 0;
337 297427 : Page childpage = NULL;
3260 bruce 338 297427 : Page newlpage = NULL,
339 297427 : newrpage = NULL;
2545 tgl 340 297427 : void *ptp_workspace = NULL;
341 : MemoryContext tmpCxt;
342 : MemoryContext oldCxt;
343 :
344 : /*
345 : * We do all the work of this function and its subfunctions in a temporary
346 : * memory context. This avoids leakages and simplifies APIs, since some
347 : * subfunctions allocate storage that has to survive until we've finished
348 : * the WAL insertion.
349 : */
350 297427 : tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
351 : "ginPlaceToPage temporary context",
352 : ALLOCSET_DEFAULT_SIZES);
353 297427 : oldCxt = MemoryContextSwitchTo(tmpCxt);
354 :
3420 heikki.linnakangas 355 297427 : if (GinPageIsData(page))
356 24795 : xlflags |= GIN_INSERT_ISDATA;
357 297427 : if (GinPageIsLeaf(page))
358 : {
359 295690 : xlflags |= GIN_INSERT_ISLEAF;
360 295690 : Assert(!BufferIsValid(childbuf));
361 295690 : Assert(updateblkno == InvalidBlockNumber);
362 : }
363 : else
364 : {
365 1737 : Assert(BufferIsValid(childbuf));
366 1737 : Assert(updateblkno != InvalidBlockNumber);
2545 kgrittn 367 1737 : childpage = BufferGetPage(childbuf);
368 : }
369 :
370 : /*
371 : * See if the incoming tuple will fit on the page. beginPlaceToPage will
372 : * decide if the page needs to be split, and will compute the split
373 : * contents if so. See comments for beginPlaceToPage and execPlaceToPage
374 : * functions for more details of the API here.
375 : */
tgl 376 297427 : rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
377 : insertdata, updateblkno,
378 : &ptp_workspace,
379 : &newlpage, &newrpage);
380 :
381 297427 : if (rc == GPTP_NO_WORK)
382 : {
383 : /* Nothing to do */
2545 tgl 384 UBC 0 : result = true;
385 : }
2545 tgl 386 CBC 297427 : else if (rc == GPTP_INSERT)
387 : {
388 : /* It will fit, perform the insertion */
389 295578 : START_CRIT_SECTION();
390 :
1467 heikki.linnakangas 391 295578 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
392 : {
2545 tgl 393 101033 : XLogBeginInsert();
394 101033 : XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
395 101033 : if (BufferIsValid(childbuf))
396 417 : XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
397 : }
398 :
399 : /* Perform the page update, and register any extra WAL data */
400 295578 : btree->execPlaceToPage(btree, stack->buffer, stack,
401 : insertdata, updateblkno, ptp_workspace);
402 :
3427 heikki.linnakangas 403 295578 : MarkBufferDirty(stack->buffer);
404 :
405 : /* An insert to an internal page finishes the split of the child. */
2545 tgl 406 295578 : if (BufferIsValid(childbuf))
407 : {
3420 heikki.linnakangas 408 1737 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
409 1737 : MarkBufferDirty(childbuf);
410 : }
411 :
1467 412 295578 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
413 : {
414 : XLogRecPtr recptr;
415 : ginxlogInsert xlrec;
416 : BlockIdData childblknos[2];
417 :
3420 418 101033 : xlrec.flags = xlflags;
419 :
3062 420 101033 : XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
421 :
422 : /*
423 : * Log information about child if this was an insertion of a
424 : * downlink.
425 : */
2545 tgl 426 101033 : if (BufferIsValid(childbuf))
427 : {
3420 heikki.linnakangas 428 417 : BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
429 417 : BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
3062 430 417 : XLogRegisterData((char *) childblknos,
431 : sizeof(BlockIdData) * 2);
432 : }
433 :
434 101033 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
3427 435 101033 : PageSetLSN(page, recptr);
2545 tgl 436 101033 : if (BufferIsValid(childbuf))
3420 heikki.linnakangas 437 417 : PageSetLSN(childpage, recptr);
438 : }
439 :
3427 440 295578 : END_CRIT_SECTION();
441 :
442 : /* Insertion is complete. */
2545 tgl 443 295578 : result = true;
444 : }
445 1849 : else if (rc == GPTP_SPLIT)
446 : {
447 : /*
448 : * Didn't fit, need to split. The split has been computed in newlpage
449 : * and newrpage, which are pointers to palloc'd pages, not associated
450 : * with buffers. stack->buffer is not touched yet.
451 : */
452 : Buffer rbuffer;
453 : BlockNumber savedRightLink;
454 : ginxlogSplit data;
3420 heikki.linnakangas 455 1849 : Buffer lbuffer = InvalidBuffer;
456 1849 : Page newrootpg = NULL;
457 :
458 : /* Get a new index page to become the right page */
3427 459 1849 : rbuffer = GinNewBuffer(btree->index);
460 :
461 : /* During index build, count the new page */
462 1849 : if (buildStats)
463 : {
464 551 : if (btree->isData)
465 46 : buildStats->nDataPages++;
466 : else
467 505 : buildStats->nEntryPages++;
468 : }
469 :
3420 470 1849 : savedRightLink = GinPageGetOpaque(page)->rightlink;
471 :
472 : /* Begin setting up WAL record */
277 rhaas 473 GNC 1849 : data.locator = btree->index->rd_locator;
3420 heikki.linnakangas 474 CBC 1849 : data.flags = xlflags;
2545 tgl 475 1849 : if (BufferIsValid(childbuf))
476 : {
3420 heikki.linnakangas 477 UBC 0 : data.leftChildBlkno = BufferGetBlockNumber(childbuf);
478 0 : data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
479 : }
480 : else
3420 heikki.linnakangas 481 CBC 1849 : data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
482 :
483 1849 : if (stack->parent == NULL)
484 : {
485 : /*
486 : * splitting the root, so we need to allocate new left page and
487 : * place pointers to left and right page on root page.
488 : */
489 112 : lbuffer = GinNewBuffer(btree->index);
490 :
491 : /* During index build, count the new left page */
492 112 : if (buildStats)
493 : {
494 92 : if (btree->isData)
495 38 : buildStats->nDataPages++;
496 : else
497 54 : buildStats->nEntryPages++;
498 : }
499 :
3062 500 112 : data.rrlink = InvalidBlockNumber;
3420 501 112 : data.flags |= GIN_SPLIT_ROOT;
502 :
3364 503 112 : GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
3427 504 112 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
505 :
506 : /*
507 : * Construct a new root page containing downlinks to the new left
508 : * and right pages. (Do this in a temporary copy rather than
509 : * overwriting the original page directly, since we're not in the
510 : * critical section yet.)
511 : */
3364 512 112 : newrootpg = PageGetTempPage(newrpage);
513 112 : GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
514 :
3420 515 112 : btree->fillRoot(btree, newrootpg,
516 : BufferGetBlockNumber(lbuffer), newlpage,
517 : BufferGetBlockNumber(rbuffer), newrpage);
518 :
1836 teodor 519 112 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
520 : {
521 :
522 112 : PredicateLockPageSplit(btree->index,
523 : BufferGetBlockNumber(stack->buffer),
524 : BufferGetBlockNumber(lbuffer));
525 :
526 112 : PredicateLockPageSplit(btree->index,
527 : BufferGetBlockNumber(stack->buffer),
528 : BufferGetBlockNumber(rbuffer));
529 : }
530 : }
531 : else
532 : {
533 : /* splitting a non-root page */
3420 heikki.linnakangas 534 1737 : data.rrlink = savedRightLink;
535 :
3364 536 1737 : GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
3420 537 1737 : GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
3427 538 1737 : GinPageGetOpaque(newlpage)->rightlink = BufferGetBlockNumber(rbuffer);
539 :
1836 teodor 540 1737 : if (GinPageIsLeaf(BufferGetPage(stack->buffer)))
541 : {
542 :
543 1737 : PredicateLockPageSplit(btree->index,
544 : BufferGetBlockNumber(stack->buffer),
545 : BufferGetBlockNumber(rbuffer));
546 : }
547 : }
548 :
549 : /*
550 : * OK, we have the new contents of the left page in a temporary copy
551 : * now (newlpage), and likewise for the new contents of the
552 : * newly-allocated right block. The original page is still unchanged.
553 : *
554 : * If this is a root split, we also have a temporary page containing
555 : * the new contents of the root.
556 : */
557 :
3420 heikki.linnakangas 558 1849 : START_CRIT_SECTION();
559 :
560 1849 : MarkBufferDirty(rbuffer);
3364 561 1849 : MarkBufferDirty(stack->buffer);
562 :
563 : /*
564 : * Restore the temporary copies over the real buffers.
565 : */
3420 566 1849 : if (stack->parent == NULL)
567 : {
568 : /* Splitting the root, three pages to update */
569 112 : MarkBufferDirty(lbuffer);
2545 tgl 570 112 : memcpy(page, newrootpg, BLCKSZ);
kgrittn 571 112 : memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
572 112 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
573 : }
574 : else
575 : {
576 : /* Normal split, only two pages to update */
tgl 577 1737 : memcpy(page, newlpage, BLCKSZ);
kgrittn 578 1737 : memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
579 : }
580 :
581 : /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
tgl 582 1849 : if (BufferIsValid(childbuf))
583 : {
2545 tgl 584 UBC 0 : GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
585 0 : MarkBufferDirty(childbuf);
586 : }
587 :
588 : /* write WAL record */
1467 heikki.linnakangas 589 CBC 1849 : if (RelationNeedsWAL(btree->index) && !btree->isBuild)
590 : {
591 : XLogRecPtr recptr;
592 :
2842 593 428 : XLogBeginInsert();
594 :
595 : /*
596 : * We just take full page images of all the split pages. Splits
597 : * are uncommon enough that it's not worth complicating the code
598 : * to be more efficient.
599 : */
3062 600 428 : if (stack->parent == NULL)
601 : {
602 11 : XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
603 11 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
604 11 : XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
605 : }
606 : else
607 : {
608 417 : XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
609 417 : XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
610 : }
611 428 : if (BufferIsValid(childbuf))
2545 tgl 612 UBC 0 : XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
613 :
3062 heikki.linnakangas 614 CBC 428 : XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
615 :
616 428 : recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
617 :
2545 tgl 618 428 : PageSetLSN(page, recptr);
kgrittn 619 428 : PageSetLSN(BufferGetPage(rbuffer), recptr);
3420 heikki.linnakangas 620 428 : if (stack->parent == NULL)
2545 kgrittn 621 11 : PageSetLSN(BufferGetPage(lbuffer), recptr);
3295 heikki.linnakangas 622 428 : if (BufferIsValid(childbuf))
3295 heikki.linnakangas 623 UBC 0 : PageSetLSN(childpage, recptr);
624 : }
3420 heikki.linnakangas 625 CBC 1849 : END_CRIT_SECTION();
626 :
627 : /*
628 : * We can release the locks/pins on the new pages now, but keep
629 : * stack->buffer locked. childbuf doesn't get unlocked either.
630 : */
631 1849 : UnlockReleaseBuffer(rbuffer);
632 1849 : if (stack->parent == NULL)
633 112 : UnlockReleaseBuffer(lbuffer);
634 :
635 : /*
636 : * If we split the root, we're done. Otherwise the split is not
637 : * complete until the downlink for the new page has been inserted to
638 : * the parent.
639 : */
2545 tgl 640 1849 : result = (stack->parent == NULL);
641 : }
642 : else
643 : {
1343 michael 644 UBC 0 : elog(ERROR, "invalid return code from GIN beginPlaceToPage method: %d", rc);
645 : result = false; /* keep compiler quiet */
646 : }
647 :
648 : /* Clean up temp context */
2545 tgl 649 CBC 297427 : MemoryContextSwitchTo(oldCxt);
650 297427 : MemoryContextDelete(tmpCxt);
651 :
652 297427 : return result;
653 : }
654 :
655 : /*
656 : * Finish a split by inserting the downlink for the new page to parent.
657 : *
658 : * On entry, stack->buffer is exclusively locked.
659 : *
660 : * If freestack is true, all the buffers are released and unlocked as we
661 : * crawl up the tree, and 'stack' is freed. Otherwise stack->buffer is kept
662 : * locked, and stack is unmodified, except for possibly moving right to find
663 : * the correct parent of page.
664 : */
665 : static void
3420 heikki.linnakangas 666 1737 : ginFinishSplit(GinBtree btree, GinBtreeStack *stack, bool freestack,
667 : GinStatsData *buildStats)
668 : {
669 : Page page;
670 : bool done;
671 1737 : bool first = true;
672 :
673 : /*
674 : * freestack == false when we encounter an incompletely split page during
675 : * a scan, while freestack == true is used in the normal scenario that a
676 : * split is finished right after the initial insert.
677 : */
678 1737 : if (!freestack)
3420 heikki.linnakangas 679 UBC 0 : elog(DEBUG1, "finishing incomplete split of block %u in gin index \"%s\"",
680 : stack->blkno, RelationGetRelationName(btree->index));
681 :
682 : /* this loop crawls up the stack until the insertion is complete */
683 : do
684 : {
3420 heikki.linnakangas 685 CBC 1737 : GinBtreeStack *parent = stack->parent;
686 : void *insertdata;
687 : BlockNumber updateblkno;
688 :
689 : /* search parent to lock */
6186 teodor 690 1737 : LockBuffer(parent->buffer, GIN_EXCLUSIVE);
691 :
692 : /*
693 : * If the parent page was incompletely split, finish that split first,
694 : * then continue with the current one.
695 : *
696 : * Note: we have to finish *all* incomplete splits we encounter, even
697 : * if we have to move right. Otherwise we might choose as the target a
698 : * page that has no downlink in the parent, and splitting it further
699 : * would fail.
700 : */
2545 kgrittn 701 1737 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
3420 heikki.linnakangas 702 UBC 0 : ginFinishSplit(btree, parent, false, buildStats);
703 :
704 : /* move right if it's needed */
2545 kgrittn 705 CBC 1737 : page = BufferGetPage(parent->buffer);
6031 bruce 706 1737 : while ((parent->off = btree->findChildPtr(btree, page, stack->blkno, parent->off)) == InvalidOffsetNumber)
707 : {
3420 heikki.linnakangas 708 UBC 0 : if (GinPageRightMost(page))
709 : {
710 : /*
711 : * rightmost page, but we don't find parent, we should use
712 : * plain search...
713 : */
3439 714 0 : LockBuffer(parent->buffer, GIN_UNLOCK);
3420 715 0 : ginFindParents(btree, stack);
6031 bruce 716 0 : parent = stack->parent;
3922 tgl 717 0 : Assert(parent != NULL);
6186 teodor 718 0 : break;
719 : }
720 :
3439 heikki.linnakangas 721 0 : parent->buffer = ginStepRight(parent->buffer, btree->index, GIN_EXCLUSIVE);
3420 722 0 : parent->blkno = BufferGetBlockNumber(parent->buffer);
2545 kgrittn 723 0 : page = BufferGetPage(parent->buffer);
724 :
725 0 : if (GinPageIsIncompleteSplit(BufferGetPage(parent->buffer)))
3420 heikki.linnakangas 726 0 : ginFinishSplit(btree, parent, false, buildStats);
727 : }
728 :
729 : /* insert the downlink */
3420 heikki.linnakangas 730 CBC 1737 : insertdata = btree->prepareDownlink(btree, stack->buffer);
2545 kgrittn 731 1737 : updateblkno = GinPageGetOpaque(BufferGetPage(stack->buffer))->rightlink;
3420 heikki.linnakangas 732 1737 : done = ginPlaceToPage(btree, parent,
733 : insertdata, updateblkno,
734 : stack->buffer, buildStats);
735 1737 : pfree(insertdata);
736 :
737 : /*
738 : * If the caller requested to free the stack, unlock and release the
739 : * child buffer now. Otherwise keep it pinned and locked, but if we
740 : * have to recurse up the tree, we can unlock the upper pages, only
741 : * keeping the page at the bottom of the stack locked.
742 : */
743 1737 : if (!first || freestack)
744 1737 : LockBuffer(stack->buffer, GIN_UNLOCK);
745 1737 : if (freestack)
746 : {
747 1737 : ReleaseBuffer(stack->buffer);
748 1737 : pfree(stack);
749 : }
750 1737 : stack = parent;
751 :
752 1737 : first = false;
753 1737 : } while (!done);
754 :
755 : /* unlock the parent */
756 1737 : LockBuffer(stack->buffer, GIN_UNLOCK);
757 :
758 1737 : if (freestack)
759 1737 : freeGinBtreeStack(stack);
760 1737 : }
761 :
762 : /*
763 : * Insert a value to tree described by stack.
764 : *
765 : * The value to be inserted is given in 'insertdata'. Its format depends
766 : * on whether this is an entry or data tree, ginInsertValue just passes it
767 : * through to the tree-specific callback function.
768 : *
769 : * During an index build, buildStats is non-null and the counters it contains
770 : * are incremented as needed.
771 : *
772 : * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
773 : */
774 : void
775 295690 : ginInsertValue(GinBtree btree, GinBtreeStack *stack, void *insertdata,
776 : GinStatsData *buildStats)
777 : {
778 : bool done;
779 :
780 : /* If the leaf page was incompletely split, finish the split first */
2545 kgrittn 781 295690 : if (GinPageIsIncompleteSplit(BufferGetPage(stack->buffer)))
3420 heikki.linnakangas 782 UBC 0 : ginFinishSplit(btree, stack, false, buildStats);
783 :
3420 heikki.linnakangas 784 CBC 295690 : done = ginPlaceToPage(btree, stack,
785 : insertdata, InvalidBlockNumber,
786 : InvalidBuffer, buildStats);
787 295690 : if (done)
788 : {
789 293953 : LockBuffer(stack->buffer, GIN_UNLOCK);
790 293953 : freeGinBtreeStack(stack);
791 : }
792 : else
793 1737 : ginFinishSplit(btree, stack, true, buildStats);
6186 teodor 794 295690 : }
|