Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistbuildbuffers.c
4 : * node buffer management functions for GiST buffering build algorithm.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistbuildbuffers.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/genam.h"
18 : #include "access/gist_private.h"
19 : #include "catalog/index.h"
20 : #include "miscadmin.h"
21 : #include "storage/buffile.h"
22 : #include "storage/bufmgr.h"
23 : #include "utils/memutils.h"
24 : #include "utils/rel.h"
25 :
26 : static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
27 : static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
28 : GISTNodeBuffer *nodeBuffer);
29 : static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
30 : GISTNodeBuffer *nodeBuffer);
31 : static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
32 : GISTNodeBuffer *nodeBuffer);
33 : static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
34 : IndexTuple itup);
35 : static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
36 : IndexTuple *itup);
37 : static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
38 : static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
39 :
40 : static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
41 : static void WriteTempFileBlock(BufFile *file, long blknum, const void *ptr);
42 :
43 :
44 : /*
45 : * Initialize GiST build buffers.
46 : */
47 : GISTBuildBuffers *
4231 heikki.linnakangas 48 CBC 3 : gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
49 : {
50 : GISTBuildBuffers *gfbb;
51 : HASHCTL hashCtl;
52 :
53 3 : gfbb = palloc(sizeof(GISTBuildBuffers));
54 3 : gfbb->pagesPerBuffer = pagesPerBuffer;
55 3 : gfbb->levelStep = levelStep;
56 :
57 : /*
58 : * Create a temporary file to hold buffer pages that are swapped out of
59 : * memory.
60 : */
3966 61 3 : gfbb->pfile = BufFileCreateTemp(false);
4231 62 3 : gfbb->nFileBlocks = 0;
63 :
64 : /* Initialize free page management. */
65 3 : gfbb->nFreeBlocks = 0;
66 3 : gfbb->freeBlocksLen = 32;
67 3 : gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
68 :
69 : /*
70 : * Current memory context will be used for all in-memory data structures
71 : * of buffers which are persistent during buffering build.
72 : */
73 3 : gfbb->context = CurrentMemoryContext;
74 :
75 : /*
76 : * nodeBuffersTab hash is association between index blocks and it's
77 : * buffers.
78 : */
79 3 : hashCtl.keysize = sizeof(BlockNumber);
80 3 : hashCtl.entrysize = sizeof(GISTNodeBuffer);
81 3 : hashCtl.hcxt = CurrentMemoryContext;
82 3 : gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
83 : 1024,
84 : &hashCtl,
85 : HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
86 :
87 3 : gfbb->bufferEmptyingQueue = NIL;
88 :
89 : /*
90 : * Per-level node buffers lists for final buffers emptying process. Node
91 : * buffers are inserted here when they are created.
92 : */
93 3 : gfbb->buffersOnLevelsLen = 1;
94 6 : gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
95 3 : gfbb->buffersOnLevelsLen);
96 3 : gfbb->buffersOnLevels[0] = NIL;
97 :
98 : /*
99 : * Block numbers of node buffers which last pages are currently loaded
100 : * into main memory.
101 : */
102 3 : gfbb->loadedBuffersLen = 32;
103 3 : gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
104 : sizeof(GISTNodeBuffer *));
105 3 : gfbb->loadedBuffersCount = 0;
106 :
3966 107 3 : gfbb->rootlevel = maxLevel;
108 :
4231 109 3 : return gfbb;
110 : }
111 :
112 : /*
113 : * Returns a node buffer for given block. The buffer is created if it
114 : * doesn't exist yet.
115 : */
116 : GISTNodeBuffer *
117 17178 : gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
118 : BlockNumber nodeBlocknum, int level)
119 : {
120 : GISTNodeBuffer *nodeBuffer;
121 : bool found;
122 :
123 : /* Find node buffer in hash table */
124 17178 : nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
125 : &nodeBlocknum,
126 : HASH_ENTER,
127 : &found);
128 17178 : if (!found)
129 : {
130 : /*
131 : * Node buffer wasn't found. Initialize the new buffer as empty.
132 : */
133 9 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
134 :
135 : /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
136 9 : nodeBuffer->blocksCount = 0;
3978 137 9 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
138 9 : nodeBuffer->pageBuffer = NULL;
4231 139 9 : nodeBuffer->queuedForEmptying = false;
1369 michael 140 9 : nodeBuffer->isTemp = false;
3966 heikki.linnakangas 141 9 : nodeBuffer->level = level;
142 :
143 : /*
144 : * Add this buffer to the list of buffers on this level. Enlarge
145 : * buffersOnLevels array if needed.
146 : */
4231 147 9 : if (level >= gfbb->buffersOnLevelsLen)
148 : {
149 : int i;
150 :
151 3 : gfbb->buffersOnLevels =
152 3 : (List **) repalloc(gfbb->buffersOnLevels,
153 3 : (level + 1) * sizeof(List *));
154 :
155 : /* initialize the enlarged portion */
156 6 : for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
157 3 : gfbb->buffersOnLevels[i] = NIL;
158 3 : gfbb->buffersOnLevelsLen = level + 1;
159 : }
160 :
161 : /*
162 : * Prepend the new buffer to the list of buffers on this level. It's
163 : * not arbitrary that the new buffer is put to the beginning of the
164 : * list: in the final emptying phase we loop through all buffers at
165 : * each level, and flush them. If a page is split during the emptying,
166 : * it's more efficient to flush the new splitted pages first, before
167 : * moving on to pre-existing pages on the level. The buffers just
168 : * created during the page split are likely still in cache, so
169 : * flushing them immediately is more efficient than putting them to
170 : * the end of the queue.
171 : */
172 18 : gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
173 9 : gfbb->buffersOnLevels[level]);
174 :
175 9 : MemoryContextSwitchTo(oldcxt);
176 : }
177 :
178 17178 : return nodeBuffer;
179 : }
180 :
181 : /*
182 : * Allocate memory for a buffer page.
183 : */
184 : static GISTNodeBufferPage *
185 24 : gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
186 : {
187 : GISTNodeBufferPage *pageBuffer;
188 :
1369 michael 189 24 : pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
190 : BLCKSZ);
4231 heikki.linnakangas 191 24 : pageBuffer->prev = InvalidBlockNumber;
192 :
193 : /* Set page free space */
194 24 : PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
195 24 : return pageBuffer;
196 : }
197 :
198 : /*
199 : * Add specified buffer into loadedBuffers array.
200 : */
201 : static void
202 24 : gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
203 : {
204 : /* Never add a temporary buffer to the array */
3978 205 24 : if (nodeBuffer->isTemp)
3978 heikki.linnakangas 206 UBC 0 : return;
207 :
208 : /* Enlarge the array if needed */
4231 heikki.linnakangas 209 CBC 24 : if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
210 : {
4231 heikki.linnakangas 211 UBC 0 : gfbb->loadedBuffersLen *= 2;
212 0 : gfbb->loadedBuffers = (GISTNodeBuffer **)
213 0 : repalloc(gfbb->loadedBuffers,
214 0 : gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
215 : }
216 :
4231 heikki.linnakangas 217 CBC 24 : gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
218 24 : gfbb->loadedBuffersCount++;
219 : }
220 :
221 : /*
222 : * Load last page of node buffer into main memory.
223 : */
224 : static void
225 9 : gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
226 : {
227 : /* Check if we really should load something */
228 9 : if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
229 : {
230 : /* Allocate memory for page */
231 9 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
232 :
233 : /* Read block from temporary file */
234 9 : ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
235 9 : nodeBuffer->pageBuffer);
236 :
237 : /* Mark file block as free */
238 9 : gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
239 :
240 : /* Mark node buffer as loaded */
241 9 : gistAddLoadedBuffer(gfbb, nodeBuffer);
242 9 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
243 : }
244 9 : }
245 :
246 : /*
247 : * Write last page of node buffer to the disk.
248 : */
249 : static void
250 21 : gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
251 : {
252 : /* Check if we have something to write */
253 21 : if (nodeBuffer->pageBuffer)
254 : {
255 : BlockNumber blkno;
256 :
257 : /* Get free file block */
258 9 : blkno = gistBuffersGetFreeBlock(gfbb);
259 :
260 : /* Write block to the temporary file */
261 9 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
262 :
263 : /* Free memory of that page */
264 9 : pfree(nodeBuffer->pageBuffer);
265 9 : nodeBuffer->pageBuffer = NULL;
266 :
267 : /* Save block number */
268 9 : nodeBuffer->pageBlocknum = blkno;
269 : }
270 21 : }
271 :
272 : /*
273 : * Write last pages of all node buffers to the disk.
274 : */
275 : void
276 9 : gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
277 : {
278 : int i;
279 :
280 : /* Unload all the buffers that have a page loaded in memory. */
281 30 : for (i = 0; i < gfbb->loadedBuffersCount; i++)
282 21 : gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
283 :
284 : /* Now there are no node buffers with loaded last page */
285 9 : gfbb->loadedBuffersCount = 0;
286 9 : }
287 :
288 : /*
289 : * Add index tuple to buffer page.
290 : */
291 : static void
292 32037 : gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
293 : {
294 32037 : Size itupsz = IndexTupleSize(itup);
295 : char *ptr;
296 :
297 : /* There should be enough of space. */
298 32037 : Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
299 :
300 : /* Reduce free space value of page to reserve a spot for the tuple. */
301 32037 : PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
302 :
303 : /* Get pointer to the spot we reserved (ie. end of free space). */
304 32037 : ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
305 32037 : + PAGE_FREE_SPACE(pageBuffer);
306 :
307 : /* Copy the index tuple there. */
308 32037 : memcpy(ptr, itup, itupsz);
309 32037 : }
310 :
311 : /*
312 : * Get last item from buffer page and remove it from page.
313 : */
314 : static void
315 32037 : gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
316 : {
317 : IndexTuple ptr;
318 : Size itupsz;
319 :
320 32037 : Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
321 :
322 : /* Get pointer to last index tuple */
323 32037 : ptr = (IndexTuple) ((char *) pageBuffer
324 : + BUFFER_PAGE_DATA_OFFSET
325 32037 : + PAGE_FREE_SPACE(pageBuffer));
326 32037 : itupsz = IndexTupleSize(ptr);
327 :
328 : /* Make a copy of the tuple */
329 32037 : *itup = (IndexTuple) palloc(itupsz);
330 32037 : memcpy(*itup, ptr, itupsz);
331 :
332 : /* Mark the space used by the tuple as free */
333 32037 : PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
334 32037 : }
335 :
336 : /*
337 : * Push an index tuple to node buffer.
338 : */
339 : void
340 32037 : gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
341 : IndexTuple itup)
342 : {
343 : /*
344 : * Most part of memory operations will be in buffering build persistent
345 : * context. So, let's switch to it.
346 : */
347 32037 : MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
348 :
349 : /*
350 : * If the buffer is currently empty, create the first page.
351 : */
352 32037 : if (nodeBuffer->blocksCount == 0)
353 : {
354 15 : nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
355 15 : nodeBuffer->blocksCount = 1;
356 15 : gistAddLoadedBuffer(gfbb, nodeBuffer);
357 : }
358 :
359 : /* Load last page of node buffer if it wasn't in memory already */
360 32037 : if (!nodeBuffer->pageBuffer)
4231 heikki.linnakangas 361 UBC 0 : gistLoadNodeBuffer(gfbb, nodeBuffer);
362 :
363 : /*
364 : * Check if there is enough space on the last page for the tuple.
365 : */
4231 heikki.linnakangas 366 CBC 32037 : if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
367 : {
368 : /*
369 : * Nope. Swap previous block to disk and allocate a new one.
370 : */
371 : BlockNumber blkno;
372 :
373 : /* Write filled page to the disk */
374 153 : blkno = gistBuffersGetFreeBlock(gfbb);
375 153 : WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
376 :
377 : /*
378 : * Reset the in-memory page as empty, and link the previous block to
379 : * the new page by storing its block number in the prev-link.
380 : */
381 153 : PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
382 : BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
383 153 : nodeBuffer->pageBuffer->prev = blkno;
384 :
385 : /* We've just added one more page */
386 153 : nodeBuffer->blocksCount++;
387 : }
388 :
389 32037 : gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
390 :
391 : /*
392 : * If the buffer just overflowed, add it to the emptying queue.
393 : */
394 32037 : if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
395 : {
4231 heikki.linnakangas 396 UBC 0 : gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
397 : gfbb->bufferEmptyingQueue);
398 0 : nodeBuffer->queuedForEmptying = true;
399 : }
400 :
401 : /* Restore memory context */
4231 heikki.linnakangas 402 CBC 32037 : MemoryContextSwitchTo(oldcxt);
403 32037 : }
404 :
405 : /*
406 : * Removes one index tuple from node buffer. Returns true if success and false
407 : * if node buffer is empty.
408 : */
409 : bool
410 32052 : gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
411 : IndexTuple *itup)
412 : {
413 : /*
414 : * If node buffer is empty then return false.
415 : */
416 32052 : if (nodeBuffer->blocksCount <= 0)
417 15 : return false;
418 :
419 : /* Load last page of node buffer if needed */
420 32037 : if (!nodeBuffer->pageBuffer)
421 9 : gistLoadNodeBuffer(gfbb, nodeBuffer);
422 :
423 : /*
424 : * Get index tuple from last non-empty page.
425 : */
426 32037 : gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
427 :
428 : /*
429 : * If we just removed the last tuple from the page, fetch previous page on
430 : * this node buffer (if any).
431 : */
432 32037 : if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
433 : {
434 : BlockNumber prevblkno;
435 :
436 : /*
437 : * blocksCount includes the page in pageBuffer, so decrease it now.
438 : */
439 168 : nodeBuffer->blocksCount--;
440 :
441 : /*
442 : * If there's more pages, fetch previous one.
443 : */
444 168 : prevblkno = nodeBuffer->pageBuffer->prev;
445 168 : if (prevblkno != InvalidBlockNumber)
446 : {
447 : /* There is a previous page. Fetch it. */
448 153 : Assert(nodeBuffer->blocksCount > 0);
449 153 : ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
450 :
451 : /*
452 : * Now that we've read the block in memory, we can release its
453 : * on-disk block for reuse.
454 : */
455 153 : gistBuffersReleaseBlock(gfbb, prevblkno);
456 : }
457 : else
458 : {
459 : /* No more pages. Free memory. */
460 15 : Assert(nodeBuffer->blocksCount == 0);
461 15 : pfree(nodeBuffer->pageBuffer);
462 15 : nodeBuffer->pageBuffer = NULL;
463 : }
464 : }
465 32037 : return true;
466 : }
467 :
468 : /*
469 : * Select a currently unused block for writing to.
470 : */
471 : static long
472 162 : gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
473 : {
474 : /*
475 : * If there are multiple free blocks, we select the one appearing last in
476 : * freeBlocks[]. If there are none, assign the next block at the end of
477 : * the file (causing the file to be extended).
478 : */
479 162 : if (gfbb->nFreeBlocks > 0)
480 75 : return gfbb->freeBlocks[--gfbb->nFreeBlocks];
481 : else
482 87 : return gfbb->nFileBlocks++;
483 : }
484 :
485 : /*
486 : * Return a block# to the freelist.
487 : */
488 : static void
489 162 : gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
490 : {
491 : int ndx;
492 :
493 : /* Enlarge freeBlocks array if full. */
494 162 : if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
495 : {
4231 heikki.linnakangas 496 UBC 0 : gfbb->freeBlocksLen *= 2;
497 0 : gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
498 0 : gfbb->freeBlocksLen *
499 : sizeof(long));
500 : }
501 :
502 : /* Add blocknum to array */
4231 heikki.linnakangas 503 CBC 162 : ndx = gfbb->nFreeBlocks++;
504 162 : gfbb->freeBlocks[ndx] = blocknum;
505 162 : }
506 :
507 : /*
508 : * Free buffering build data structure.
509 : */
510 : void
511 3 : gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
512 : {
513 : /* Close buffers file. */
514 3 : BufFileClose(gfbb->pfile);
515 :
516 : /* All other things will be freed on memory context release */
517 3 : }
518 :
519 : /*
520 : * Data structure representing information about node buffer for index tuples
521 : * relocation from splitted node buffer.
522 : */
523 : typedef struct
524 : {
525 : GISTENTRY entry[INDEX_MAX_KEYS];
526 : bool isnull[INDEX_MAX_KEYS];
527 : GISTPageSplitInfo *splitinfo;
528 : GISTNodeBuffer *nodeBuffer;
529 : } RelocationBufferInfo;
530 :
531 : /*
532 : * At page split, distribute tuples from the buffer of the split page to
533 : * new buffers for the created page halves. This also adjusts the downlinks
534 : * in 'splitinfo' to include the tuples in the buffers.
535 : */
536 : void
537 384 : gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
538 : Relation r, int level,
539 : Buffer buffer, List *splitinfo)
540 : {
541 : RelocationBufferInfo *relocationBuffersInfos;
542 : bool found;
543 : GISTNodeBuffer *nodeBuffer;
544 : BlockNumber blocknum;
545 : IndexTuple itup;
226 drowley 546 GNC 384 : int splitPagesCount = 0;
547 : GISTENTRY entry[INDEX_MAX_KEYS];
548 : bool isnull[INDEX_MAX_KEYS];
549 : GISTNodeBuffer oldBuf;
550 : ListCell *lc;
551 :
4231 heikki.linnakangas 552 ECB : /* If the splitted page doesn't have buffers, we have nothing to do. */
3966 heikki.linnakangas 553 CBC 384 : if (!LEVEL_HAS_BUFFERS(level, gfbb))
4231 heikki.linnakangas 554 GIC 378 : return;
555 :
556 : /*
557 : * Get the node buffer of the splitted page.
4231 heikki.linnakangas 558 ECB : */
4231 heikki.linnakangas 559 CBC 6 : blocknum = BufferGetBlockNumber(buffer);
4231 heikki.linnakangas 560 GIC 6 : nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
4231 heikki.linnakangas 561 ECB : HASH_FIND, &found);
4231 heikki.linnakangas 562 GIC 6 : if (!found)
563 : {
4055 heikki.linnakangas 564 EUB : /* The page has no buffer, so we have nothing to do. */
4055 heikki.linnakangas 565 UIC 0 : return;
566 : }
567 :
568 : /*
569 : * Make a copy of the old buffer, as we're going reuse it as the buffer
570 : * for the new left page, which is on the same block as the old page.
571 : * That's not true for the root page, but that's fine because we never
572 : * have a buffer on the root page anyway. The original algorithm as
573 : * described by Arge et al did, but it's of no use, as you might as well
574 : * read the tuples straight from the heap instead of the root buffer.
4231 heikki.linnakangas 575 ECB : */
4231 heikki.linnakangas 576 CBC 6 : Assert(blocknum != GIST_ROOT_BLKNO);
3978 577 6 : memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
3978 heikki.linnakangas 578 GIC 6 : oldBuf.isTemp = true;
579 :
4231 heikki.linnakangas 580 ECB : /* Reset the old buffer, used for the new left page from now on */
4231 heikki.linnakangas 581 CBC 6 : nodeBuffer->blocksCount = 0;
582 6 : nodeBuffer->pageBuffer = NULL;
4231 heikki.linnakangas 583 GIC 6 : nodeBuffer->pageBlocknum = InvalidBlockNumber;
584 :
585 : /*
586 : * Allocate memory for information about relocation buffers.
4231 heikki.linnakangas 587 ECB : */
4231 heikki.linnakangas 588 GIC 6 : splitPagesCount = list_length(splitinfo);
4231 heikki.linnakangas 589 ECB : relocationBuffersInfos =
4231 heikki.linnakangas 590 GIC 6 : (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
591 : splitPagesCount);
592 :
593 : /*
594 : * Fill relocation buffers information for node buffers of pages produced
595 : * by split.
4231 heikki.linnakangas 596 ECB : */
4231 heikki.linnakangas 597 CBC 18 : foreach(lc, splitinfo)
598 : {
599 12 : GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
600 : GISTNodeBuffer *newNodeBuffer;
226 drowley 601 GNC 12 : int i = foreach_current_index(lc);
602 :
4231 heikki.linnakangas 603 ECB : /* Decompress parent index tuple of node buffer page. */
4231 heikki.linnakangas 604 GIC 12 : gistDeCompressAtt(giststate, r,
4231 heikki.linnakangas 605 ECB : si->downlink, NULL, (OffsetNumber) 0,
4231 heikki.linnakangas 606 CBC 12 : relocationBuffersInfos[i].entry,
4231 heikki.linnakangas 607 GIC 12 : relocationBuffersInfos[i].isnull);
608 :
609 : /*
610 : * Create a node buffer for the page. The leftmost half is on the same
611 : * block as the old page before split, so for the leftmost half this
612 : * will return the original buffer. The tuples on the original buffer
613 : * were relinked to the temporary buffer, so the original one is now
614 : * empty.
4231 heikki.linnakangas 615 ECB : */
3966 heikki.linnakangas 616 GIC 12 : newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
4231 heikki.linnakangas 617 ECB :
4231 heikki.linnakangas 618 CBC 12 : relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
4231 heikki.linnakangas 619 GIC 12 : relocationBuffersInfos[i].splitinfo = si;
620 : }
621 :
622 : /*
623 : * Loop through all index tuples in the buffer of the page being split,
624 : * moving them to buffers for the new pages. We try to move each tuple to
625 : * the page that will result in the lowest penalty for the leading column
626 : * or, in the case of a tie, the lowest penalty for the earliest column
627 : * that is not tied.
3874 rhaas 628 ECB : *
629 : * The page searching logic is very similar to gistchoose().
630 : */
3978 heikki.linnakangas 631 GIC 14877 : while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
632 : {
633 : float best_penalty[INDEX_MAX_KEYS];
634 : int i,
635 : which;
4231 heikki.linnakangas 636 ECB : IndexTuple newtup;
637 : RelocationBufferInfo *targetBufferInfo;
638 :
4231 heikki.linnakangas 639 GIC 14871 : gistDeCompressAtt(giststate, r,
4231 heikki.linnakangas 640 ECB : itup, NULL, (OffsetNumber) 0, entry, isnull);
641 :
642 : /* default to using first page (shouldn't matter) */
3874 tgl 643 GIC 14871 : which = 0;
644 :
645 : /*
646 : * best_penalty[j] is the best penalty we have seen so far for column
3874 tgl 647 ECB : * j, or -1 when we haven't yet examined column j. Array entries to
648 : * the right of the first -1 are undefined.
649 : */
3874 tgl 650 GIC 14871 : best_penalty[0] = -1;
651 :
652 : /*
3874 tgl 653 ECB : * Loop over possible target pages, looking for one to move this tuple
654 : * to.
655 : */
3874 tgl 656 GIC 44607 : for (i = 0; i < splitPagesCount; i++)
657 : {
4231 heikki.linnakangas 658 29739 : RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
3874 tgl 659 ECB : bool zero_penalty;
660 : int j;
661 :
3874 tgl 662 CBC 29739 : zero_penalty = true;
663 :
664 : /* Loop over index attributes. */
909 tgl 665 GIC 55290 : for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
666 : {
4231 heikki.linnakangas 667 ECB : float usize;
668 :
3874 tgl 669 : /* Compute penalty for this column. */
4231 heikki.linnakangas 670 CBC 29739 : usize = gistpenalty(giststate, j,
4231 heikki.linnakangas 671 ECB : &splitPageInfo->entry[j],
4231 heikki.linnakangas 672 CBC 29739 : splitPageInfo->isnull[j],
4231 heikki.linnakangas 673 GIC 29739 : &entry[j], isnull[j]);
3874 tgl 674 CBC 29739 : if (usize > 0)
3874 tgl 675 GIC 29736 : zero_penalty = false;
676 :
677 29739 : if (best_penalty[j] < 0 || usize < best_penalty[j])
678 : {
679 : /*
680 : * New best penalty for column. Tentatively select this
681 : * page as the target, and record the best penalty. Then
682 : * reset the next column's penalty to "unknown" (and
683 : * indirectly, the same for all the ones to its right).
684 : * This will force us to adopt this page's penalty values
3874 tgl 685 ECB : * as the best for all the remaining columns during
686 : * subsequent loop iterations.
687 : */
4231 heikki.linnakangas 688 CBC 25551 : which = i;
3874 tgl 689 GBC 25551 : best_penalty[j] = usize;
690 :
909 tgl 691 CBC 25551 : if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
3874 tgl 692 UIC 0 : best_penalty[j + 1] = -1;
693 : }
3874 tgl 694 GIC 4188 : else if (best_penalty[j] == usize)
695 : {
696 : /*
697 : * The current page is exactly as good for this column as
698 : * the best page seen so far. The next iteration of this
699 : * loop will compare the next column.
700 : */
701 : }
702 : else
703 : {
704 : /*
705 : * The current page is worse for this column than the best
3874 tgl 706 ECB : * page seen so far. Skip the remaining columns and move
707 : * on to the next page, if any.
708 : */
2118 tgl 709 GIC 4188 : zero_penalty = false; /* so outer loop won't exit */
4231 heikki.linnakangas 710 4188 : break;
711 : }
712 : }
713 :
714 : /*
715 : * If we find a page with zero penalty for all columns, there's no
3874 tgl 716 ECB : * need to examine remaining pages; just break out of the loop and
717 : * return it.
718 : */
3874 tgl 719 GIC 29739 : if (zero_penalty)
720 3 : break;
4231 heikki.linnakangas 721 ECB : }
722 :
723 : /* OK, "which" is the page index to push the tuple to */
4231 heikki.linnakangas 724 CBC 14871 : targetBufferInfo = &relocationBuffersInfos[which];
725 :
726 : /* Push item to selected node buffer */
727 14871 : gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
728 :
4231 heikki.linnakangas 729 ECB : /* Adjust the downlink for this page, if needed. */
4231 heikki.linnakangas 730 GIC 14871 : newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
4231 heikki.linnakangas 731 ECB : itup, giststate);
4231 heikki.linnakangas 732 GIC 14871 : if (newtup)
4231 heikki.linnakangas 733 ECB : {
4231 heikki.linnakangas 734 CBC 14868 : gistDeCompressAtt(giststate, r,
735 : newtup, NULL, (OffsetNumber) 0,
736 14868 : targetBufferInfo->entry,
4231 heikki.linnakangas 737 GIC 14868 : targetBufferInfo->isnull);
738 :
739 14868 : targetBufferInfo->splitinfo->downlink = newtup;
4231 heikki.linnakangas 740 ECB : }
741 : }
742 :
4231 heikki.linnakangas 743 GIC 6 : pfree(relocationBuffersInfos);
744 : }
745 :
746 :
747 : /*
748 : * Wrappers around BufFile operations. The main difference is that these
749 : * wrappers report errors with ereport(), so that the callers don't need
750 : * to check the return code.
4231 heikki.linnakangas 751 ECB : */
752 :
753 : static void
4231 heikki.linnakangas 754 GBC 162 : ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
4231 heikki.linnakangas 755 ECB : {
4231 heikki.linnakangas 756 GIC 162 : if (BufFileSeekBlock(file, blknum) != 0)
1027 tmunro 757 LBC 0 : elog(ERROR, "could not seek to block %ld in temporary file", blknum);
83 peter 758 GNC 162 : BufFileReadExact(file, ptr, BLCKSZ);
4231 heikki.linnakangas 759 CBC 162 : }
760 :
761 : static void
100 peter 762 GNC 162 : WriteTempFileBlock(BufFile *file, long blknum, const void *ptr)
763 : {
4231 heikki.linnakangas 764 GIC 162 : if (BufFileSeekBlock(file, blknum) != 0)
1027 tmunro 765 UIC 0 : elog(ERROR, "could not seek to block %ld in temporary file", blknum);
1027 tmunro 766 GIC 162 : BufFileWrite(file, ptr, BLCKSZ);
4231 heikki.linnakangas 767 162 : }
|