Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * freepage.c
4 : : * Management of free memory pages.
5 : : *
6 : : * The intention of this code is to provide infrastructure for memory
7 : : * allocators written specifically for PostgreSQL. At least in the case
8 : : * of dynamic shared memory, we can't simply use malloc() or even
9 : : * relatively thin wrappers like palloc() which sit on top of it, because
10 : : * no allocator built into the operating system will deal with relative
11 : : * pointers. In the future, we may find other cases in which greater
12 : : * control over our own memory management seems desirable.
13 : : *
14 : : * A FreePageManager keeps track of which 4kB pages of memory are currently
15 : : * unused from the point of view of some higher-level memory allocator.
16 : : * Unlike a user-facing allocator such as palloc(), a FreePageManager can
17 : : * only allocate and free in units of whole pages, and freeing an
18 : : * allocation can only be done given knowledge of its length in pages.
19 : : *
20 : : * Since a free page manager has only a fixed amount of dedicated memory,
21 : : * and since there is no underlying allocator, it uses the free pages
22 : : * it is given to manage to store its bookkeeping data. It keeps multiple
23 : : * freelists of runs of pages, sorted by the size of the run; the head of
24 : : * each freelist is stored in the FreePageManager itself, and the first
25 : : * page of each run contains a relative pointer to the next run. See
26 : : * FreePageManagerGetInternal for more details on how the freelists are
27 : : * managed.
28 : : *
29 : : * To avoid memory fragmentation, it's important to consolidate adjacent
30 : : * spans of pages whenever possible; otherwise, large allocation requests
31 : : * might not be satisfied even when sufficient contiguous space is
32 : : * available. Therefore, in addition to the freelists, we maintain an
33 : : * in-memory btree of free page ranges ordered by page number. If a
34 : : * range being freed precedes or follows a range that is already free,
35 : : * the existing range is extended; if it exactly bridges the gap between
36 : : * free ranges, then the two existing ranges are consolidated with the
37 : : * newly-freed range to form one great big range of free pages.
38 : : *
39 : : * When there is only one range of free pages, the btree is trivial and
40 : : * is stored within the FreePageManager proper; otherwise, pages are
41 : : * allocated from the area under management as needed. Even in cases
42 : : * where memory fragmentation is very severe, only a tiny fraction of
43 : : * the pages under management are consumed by this btree.
44 : : *
45 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
46 : : * Portions Copyright (c) 1994, Regents of the University of California
47 : : *
48 : : * IDENTIFICATION
49 : : * src/backend/utils/mmgr/freepage.c
50 : : *
51 : : *-------------------------------------------------------------------------
52 : : */
53 : :
54 : : #include "postgres.h"
55 : : #include "lib/stringinfo.h"
56 : : #include "miscadmin.h"
57 : :
58 : : #include "utils/freepage.h"
59 : : #include "utils/relptr.h"
60 : :
61 : :
62 : : /* Magic numbers to identify various page types */
63 : : #define FREE_PAGE_SPAN_LEADER_MAGIC 0xea4020f0
64 : : #define FREE_PAGE_LEAF_MAGIC 0x98eae728
65 : : #define FREE_PAGE_INTERNAL_MAGIC 0x19aa32c9
66 : :
67 : : /* Doubly linked list of spans of free pages; stored in first page of span. */
68 : : struct FreePageSpanLeader
69 : : {
70 : : int magic; /* always FREE_PAGE_SPAN_LEADER_MAGIC */
71 : : Size npages; /* number of pages in span */
72 : : RelptrFreePageSpanLeader prev;
73 : : RelptrFreePageSpanLeader next;
74 : : };
75 : :
76 : : /* Common header for btree leaf and internal pages. */
77 : : typedef struct FreePageBtreeHeader
78 : : {
79 : : int magic; /* FREE_PAGE_LEAF_MAGIC or
80 : : * FREE_PAGE_INTERNAL_MAGIC */
81 : : Size nused; /* number of items used */
82 : : RelptrFreePageBtree parent; /* uplink */
83 : : } FreePageBtreeHeader;
84 : :
85 : : /* Internal key; points to next level of btree. */
86 : : typedef struct FreePageBtreeInternalKey
87 : : {
88 : : Size first_page; /* low bound for keys on child page */
89 : : RelptrFreePageBtree child; /* downlink */
90 : : } FreePageBtreeInternalKey;
91 : :
92 : : /* Leaf key; no payload data. */
93 : : typedef struct FreePageBtreeLeafKey
94 : : {
95 : : Size first_page; /* first page in span */
96 : : Size npages; /* number of pages in span */
97 : : } FreePageBtreeLeafKey;
98 : :
99 : : /* Work out how many keys will fit on a page. */
100 : : #define FPM_ITEMS_PER_INTERNAL_PAGE \
101 : : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
102 : : sizeof(FreePageBtreeInternalKey))
103 : : #define FPM_ITEMS_PER_LEAF_PAGE \
104 : : ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
105 : : sizeof(FreePageBtreeLeafKey))
106 : :
107 : : /* A btree page of either sort */
108 : : struct FreePageBtree
109 : : {
110 : : FreePageBtreeHeader hdr;
111 : : union
112 : : {
113 : : FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
114 : : FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
115 : : } u;
116 : : };
117 : :
118 : : /* Results of a btree search */
119 : : typedef struct FreePageBtreeSearchResult
120 : : {
121 : : FreePageBtree *page;
122 : : Size index;
123 : : bool found;
124 : : unsigned split_pages;
125 : : } FreePageBtreeSearchResult;
126 : :
127 : : /* Helper functions */
128 : : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
129 : : FreePageBtree *btp);
130 : : static Size FreePageBtreeCleanup(FreePageManager *fpm);
131 : : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
132 : : FreePageBtree *btp);
133 : : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
134 : : FreePageBtree *btp);
135 : : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
136 : : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
137 : : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
138 : : Size index, Size first_page, FreePageBtree *child);
139 : : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
140 : : Size first_page, Size npages);
141 : : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
142 : : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
143 : : Size index);
144 : : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
145 : : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
146 : : FreePageBtreeSearchResult *result);
147 : : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
148 : : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
149 : : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
150 : : FreePageBtree *btp);
151 : : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
152 : : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
153 : : FreePageBtree *parent, int level, StringInfo buf);
154 : : static void FreePageManagerDumpSpans(FreePageManager *fpm,
155 : : FreePageSpanLeader *span, Size expected_pages,
156 : : StringInfo buf);
157 : : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
158 : : Size *first_page);
159 : : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
160 : : Size npages, bool soft);
161 : : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
162 : : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
163 : : Size npages);
164 : : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
165 : : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
166 : :
167 : : #ifdef FPM_EXTRA_ASSERTS
168 : : static Size sum_free_pages(FreePageManager *fpm);
169 : : #endif
170 : :
171 : : /*
172 : : * Initialize a new, empty free page manager.
173 : : *
174 : : * 'fpm' should reference caller-provided memory large enough to contain a
175 : : * FreePageManager. We'll initialize it here.
176 : : *
177 : : * 'base' is the address to which all pointers are relative. When managing
178 : : * a dynamic shared memory segment, it should normally be the base of the
179 : : * segment. When managing backend-private memory, it can be either NULL or,
180 : : * if managing a single contiguous extent of memory, the start of that extent.
181 : : */
182 : : void
2690 rhaas@postgresql.org 183 :CBC 2293 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
184 : : {
185 : : Size f;
186 : :
187 : 2293 : relptr_store(base, fpm->self, fpm);
188 : 2293 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
189 : 2293 : relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
190 : 2293 : fpm->btree_depth = 0;
191 : 2293 : fpm->btree_recycle_count = 0;
192 : 2293 : fpm->singleton_first_page = 0;
193 : 2293 : fpm->singleton_npages = 0;
194 : 2293 : fpm->contiguous_pages = 0;
195 : 2293 : fpm->contiguous_pages_dirty = true;
196 : : #ifdef FPM_EXTRA_ASSERTS
197 : : fpm->free_pages = 0;
198 : : #endif
199 : :
200 [ + + ]: 298090 : for (f = 0; f < FPM_NUM_FREELISTS; f++)
201 : 295797 : relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
202 : 2293 : }
203 : :
204 : : /*
205 : : * Allocate a run of pages of the given length from the free page manager.
206 : : * The return value indicates whether we were able to satisfy the request;
207 : : * if true, the first page of the allocation is stored in *first_page.
208 : : */
209 : : bool
210 : 11078 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
211 : : {
212 : : bool result;
213 : : Size contiguous_pages;
214 : :
215 : 11078 : result = FreePageManagerGetInternal(fpm, npages, first_page);
216 : :
217 : : /*
218 : : * It's a bit counterintuitive, but allocating pages can actually create
219 : : * opportunities for cleanup that create larger ranges. We might pull a
220 : : * key out of the btree that enables the item at the head of the btree
221 : : * recycle list to be inserted; and then if there are more items behind it
222 : : * one of those might cause two currently-separated ranges to merge,
223 : : * creating a single range of contiguous pages larger than any that
224 : : * existed previously. It might be worth trying to improve the cleanup
225 : : * algorithm to avoid such corner cases, but for now we just notice the
226 : : * condition and do the appropriate reporting.
227 : : */
228 : 11078 : contiguous_pages = FreePageBtreeCleanup(fpm);
229 [ + + ]: 11078 : if (fpm->contiguous_pages < contiguous_pages)
230 : 70 : fpm->contiguous_pages = contiguous_pages;
231 : :
232 : : /*
233 : : * FreePageManagerGetInternal may have set contiguous_pages_dirty.
234 : : * Recompute contiguous_pages if so.
235 : : */
236 : 11078 : FreePageManagerUpdateLargest(fpm);
237 : :
238 : : #ifdef FPM_EXTRA_ASSERTS
239 : : if (result)
240 : : {
241 : : Assert(fpm->free_pages >= npages);
242 : : fpm->free_pages -= npages;
243 : : }
244 : : Assert(fpm->free_pages == sum_free_pages(fpm));
245 : : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
246 : : #endif
247 : 11078 : return result;
248 : : }
249 : :
250 : : #ifdef FPM_EXTRA_ASSERTS
251 : : static void
252 : : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
253 : : {
254 : : char *base = fpm_segment_base(fpm);
255 : :
256 : : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
257 : : btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
258 : : ++*sum;
259 : : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
260 : : {
261 : : Size index;
262 : :
263 : :
264 : : for (index = 0; index < btp->hdr.nused; ++index)
265 : : {
266 : : FreePageBtree *child;
267 : :
268 : : child = relptr_access(base, btp->u.internal_key[index].child);
269 : : sum_free_pages_recurse(fpm, child, sum);
270 : : }
271 : : }
272 : : }
273 : : static Size
274 : : sum_free_pages(FreePageManager *fpm)
275 : : {
276 : : FreePageSpanLeader *recycle;
277 : : char *base = fpm_segment_base(fpm);
278 : : Size sum = 0;
279 : : int list;
280 : :
281 : : /* Count the spans by scanning the freelists. */
282 : : for (list = 0; list < FPM_NUM_FREELISTS; ++list)
283 : : {
284 : :
285 : : if (!relptr_is_null(fpm->freelist[list]))
286 : : {
287 : : FreePageSpanLeader *candidate =
288 : : relptr_access(base, fpm->freelist[list]);
289 : :
290 : : do
291 : : {
292 : : sum += candidate->npages;
293 : : candidate = relptr_access(base, candidate->next);
294 : : } while (candidate != NULL);
295 : : }
296 : : }
297 : :
298 : : /* Count btree internal pages. */
299 : : if (fpm->btree_depth > 0)
300 : : {
301 : : FreePageBtree *root = relptr_access(base, fpm->btree_root);
302 : :
303 : : sum_free_pages_recurse(fpm, root, &sum);
304 : : }
305 : :
306 : : /* Count the recycle list. */
307 : : for (recycle = relptr_access(base, fpm->btree_recycle);
308 : : recycle != NULL;
309 : : recycle = relptr_access(base, recycle->next))
310 : : {
311 : : Assert(recycle->npages == 1);
312 : : ++sum;
313 : : }
314 : :
315 : : return sum;
316 : : }
317 : : #endif
318 : :
319 : : /*
320 : : * Compute the size of the largest run of pages that the user could
321 : : * successfully get.
322 : : */
323 : : static Size
324 : 12133 : FreePageManagerLargestContiguous(FreePageManager *fpm)
325 : : {
326 : : char *base;
327 : : Size largest;
328 : :
329 : 12133 : base = fpm_segment_base(fpm);
330 : 12133 : largest = 0;
331 [ + + ]: 12133 : if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
332 : : {
333 : : FreePageSpanLeader *candidate;
334 : :
335 [ + - ]: 6836 : candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
336 : : do
337 : : {
338 [ + - ]: 6836 : if (candidate->npages > largest)
339 : 6836 : largest = candidate->npages;
340 [ - + ]: 6836 : candidate = relptr_access(base, candidate->next);
341 [ - + ]: 6836 : } while (candidate != NULL);
342 : : }
343 : : else
344 : : {
345 : 5297 : Size f = FPM_NUM_FREELISTS - 1;
346 : :
347 : : do
348 : : {
349 : 444429 : --f;
350 [ + + ]: 444429 : if (!relptr_is_null(fpm->freelist[f]))
351 : : {
352 : 5297 : largest = f + 1;
353 : 5297 : break;
354 : : }
355 [ + - ]: 439132 : } while (f > 0);
356 : : }
357 : :
358 : 12133 : return largest;
359 : : }
360 : :
361 : : /*
362 : : * Recompute the size of the largest run of pages that the user could
363 : : * successfully get, if it has been marked dirty.
364 : : */
365 : : static void
366 : 15368 : FreePageManagerUpdateLargest(FreePageManager *fpm)
367 : : {
368 [ + + ]: 15368 : if (!fpm->contiguous_pages_dirty)
369 : 3235 : return;
370 : :
371 : 12133 : fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
372 : 12133 : fpm->contiguous_pages_dirty = false;
373 : : }
374 : :
375 : : /*
376 : : * Transfer a run of pages to the free page manager.
377 : : */
378 : : void
379 : 4290 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
380 : : {
381 : : Size contiguous_pages;
382 : :
383 [ - + ]: 4290 : Assert(npages > 0);
384 : :
385 : : /* Record the new pages. */
386 : : contiguous_pages =
387 : 4290 : FreePageManagerPutInternal(fpm, first_page, npages, false);
388 : :
389 : : /*
390 : : * If the new range we inserted into the page manager was contiguous with
391 : : * an existing range, it may have opened up cleanup opportunities.
392 : : */
393 [ + + ]: 4290 : if (contiguous_pages > npages)
394 : : {
395 : : Size cleanup_contiguous_pages;
396 : :
397 : 1584 : cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
398 [ + + ]: 1584 : if (cleanup_contiguous_pages > contiguous_pages)
399 : 42 : contiguous_pages = cleanup_contiguous_pages;
400 : : }
401 : :
402 : : /* See if we now have a new largest chunk. */
403 [ + + ]: 4290 : if (fpm->contiguous_pages < contiguous_pages)
404 : 2676 : fpm->contiguous_pages = contiguous_pages;
405 : :
406 : : /*
407 : : * The earlier call to FreePageManagerPutInternal may have set
408 : : * contiguous_pages_dirty if it needed to allocate internal pages, so
409 : : * recompute contiguous_pages if necessary.
410 : : */
411 : 4290 : FreePageManagerUpdateLargest(fpm);
412 : :
413 : : #ifdef FPM_EXTRA_ASSERTS
414 : : fpm->free_pages += npages;
415 : : Assert(fpm->free_pages == sum_free_pages(fpm));
416 : : Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
417 : : #endif
418 : 4290 : }
419 : :
420 : : /*
421 : : * Produce a debugging dump of the state of a free page manager.
422 : : */
423 : : char *
2690 rhaas@postgresql.org 424 :UBC 0 : FreePageManagerDump(FreePageManager *fpm)
425 : : {
426 : 0 : char *base = fpm_segment_base(fpm);
427 : : StringInfoData buf;
428 : : FreePageSpanLeader *recycle;
429 : 0 : bool dumped_any_freelist = false;
430 : : Size f;
431 : :
432 : : /* Initialize output buffer. */
433 : 0 : initStringInfo(&buf);
434 : :
435 : : /* Dump general stuff. */
436 : 0 : appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
657 tmunro@postgresql.or 437 : 0 : relptr_offset(fpm->self), fpm->contiguous_pages);
438 : :
439 : : /* Dump btree. */
2690 rhaas@postgresql.org 440 [ # # ]: 0 : if (fpm->btree_depth > 0)
441 : : {
442 : : FreePageBtree *root;
443 : :
444 : 0 : appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
445 [ # # ]: 0 : root = relptr_access(base, fpm->btree_root);
446 : 0 : FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
447 : : }
448 [ # # ]: 0 : else if (fpm->singleton_npages > 0)
449 : : {
450 : 0 : appendStringInfo(&buf, "singleton: %zu(%zu)\n",
451 : : fpm->singleton_first_page, fpm->singleton_npages);
452 : : }
453 : :
454 : : /* Dump btree recycle list. */
455 [ # # ]: 0 : recycle = relptr_access(base, fpm->btree_recycle);
456 [ # # ]: 0 : if (recycle != NULL)
457 : : {
2434 peter_e@gmx.net 458 : 0 : appendStringInfoString(&buf, "btree recycle:");
2690 rhaas@postgresql.org 459 : 0 : FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
460 : : }
461 : :
462 : : /* Dump free lists. */
463 [ # # ]: 0 : for (f = 0; f < FPM_NUM_FREELISTS; ++f)
464 : : {
465 : : FreePageSpanLeader *span;
466 : :
467 [ # # ]: 0 : if (relptr_is_null(fpm->freelist[f]))
468 : 0 : continue;
469 [ # # ]: 0 : if (!dumped_any_freelist)
470 : : {
2434 peter_e@gmx.net 471 : 0 : appendStringInfoString(&buf, "freelists:\n");
2690 rhaas@postgresql.org 472 : 0 : dumped_any_freelist = true;
473 : : }
474 : 0 : appendStringInfo(&buf, " %zu:", f + 1);
475 [ # # ]: 0 : span = relptr_access(base, fpm->freelist[f]);
476 : 0 : FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
477 : : }
478 : :
479 : : /* And return result to caller. */
480 : 0 : return buf.data;
481 : : }
482 : :
483 : :
484 : : /*
485 : : * The first_page value stored at index zero in any non-root page must match
486 : : * the first_page value stored in its parent at the index which points to that
487 : : * page. So when the value stored at index zero in a btree page changes, we've
488 : : * got to walk up the tree adjusting ancestor keys until we reach an ancestor
489 : : * where that key isn't index zero. This function should be called after
490 : : * updating the first key on the target page; it will propagate the change
491 : : * upward as far as needed.
492 : : *
493 : : * We assume here that the first key on the page has not changed enough to
494 : : * require changes in the ordering of keys on its ancestor pages. Thus,
495 : : * if we search the parent page for the first key greater than or equal to
496 : : * the first key on the current page, the downlink to this page will be either
497 : : * the exact index returned by the search (if the first key decreased)
498 : : * or one less (if the first key increased).
499 : : */
500 : : static void
2690 rhaas@postgresql.org 501 :CBC 1347 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
502 : : {
503 : 1347 : char *base = fpm_segment_base(fpm);
504 : : Size first_page;
505 : : FreePageBtree *parent;
506 : : FreePageBtree *child;
507 : :
508 : : /* This might be either a leaf or an internal page. */
509 [ - + ]: 1347 : Assert(btp->hdr.nused > 0);
510 [ + - ]: 1347 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
511 : : {
512 [ - + ]: 1347 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
513 : 1347 : first_page = btp->u.leaf_key[0].first_page;
514 : : }
515 : : else
516 : : {
2690 rhaas@postgresql.org 517 [ # # ]:UBC 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
518 [ # # ]: 0 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
519 : 0 : first_page = btp->u.internal_key[0].first_page;
520 : : }
2690 rhaas@postgresql.org 521 :CBC 1347 : child = btp;
522 : :
523 : : /* Loop until we find an ancestor that does not require adjustment. */
524 : : for (;;)
2690 rhaas@postgresql.org 525 :UBC 0 : {
526 : : Size s;
527 : :
2690 rhaas@postgresql.org 528 [ - + ]:CBC 1347 : parent = relptr_access(base, child->hdr.parent);
529 [ + - ]: 1347 : if (parent == NULL)
530 : 1347 : break;
2690 rhaas@postgresql.org 531 :UBC 0 : s = FreePageBtreeSearchInternal(parent, first_page);
532 : :
533 : : /* Key is either at index s or index s-1; figure out which. */
534 [ # # ]: 0 : if (s >= parent->hdr.nused)
535 : : {
536 [ # # ]: 0 : Assert(s == parent->hdr.nused);
537 : 0 : --s;
538 : : }
539 : : else
540 : : {
541 : : FreePageBtree *check;
542 : :
543 [ # # ]: 0 : check = relptr_access(base, parent->u.internal_key[s].child);
544 [ # # ]: 0 : if (check != child)
545 : : {
546 [ # # ]: 0 : Assert(s > 0);
547 : 0 : --s;
548 : : }
549 : : }
550 : :
551 : : #ifdef USE_ASSERT_CHECKING
552 : : /* Debugging double-check. */
553 : : {
554 : : FreePageBtree *check;
555 : :
556 [ # # ]: 0 : check = relptr_access(base, parent->u.internal_key[s].child);
557 [ # # ]: 0 : Assert(s < parent->hdr.nused);
558 [ # # ]: 0 : Assert(child == check);
559 : : }
560 : : #endif
561 : :
562 : : /* Update the parent key. */
563 : 0 : parent->u.internal_key[s].first_page = first_page;
564 : :
565 : : /*
566 : : * If this is the first key in the parent, go up another level; else
567 : : * done.
568 : : */
569 [ # # ]: 0 : if (s > 0)
570 : 0 : break;
571 : 0 : child = parent;
572 : : }
2690 rhaas@postgresql.org 573 :CBC 1347 : }
574 : :
575 : : /*
576 : : * Attempt to reclaim space from the free-page btree. The return value is
577 : : * the largest range of contiguous pages created by the cleanup operation.
578 : : */
579 : : static Size
580 : 12662 : FreePageBtreeCleanup(FreePageManager *fpm)
581 : : {
582 : 12662 : char *base = fpm_segment_base(fpm);
583 : 12662 : Size max_contiguous_pages = 0;
584 : :
585 : : /* Attempt to shrink the depth of the btree. */
586 [ + + ]: 12818 : while (!relptr_is_null(fpm->btree_root))
587 : : {
588 [ + - ]: 2354 : FreePageBtree *root = relptr_access(base, fpm->btree_root);
589 : :
590 : : /* If the root contains only one key, reduce depth by one. */
591 [ + + ]: 2354 : if (root->hdr.nused == 1)
592 : : {
593 : : /* Shrink depth of tree by one. */
594 [ - + ]: 156 : Assert(fpm->btree_depth > 0);
595 : 156 : --fpm->btree_depth;
596 [ + - ]: 156 : if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
597 : : {
598 : : /* If root is a leaf, convert only entry to singleton range. */
599 : 156 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
600 : 156 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
601 : 156 : fpm->singleton_npages = root->u.leaf_key[0].npages;
602 : : }
603 : : else
604 : : {
605 : : FreePageBtree *newroot;
606 : :
607 : : /* If root is an internal page, make only child the root. */
2690 rhaas@postgresql.org 608 [ # # ]:UBC 0 : Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
609 : 0 : relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
610 [ # # ]: 0 : newroot = relptr_access(base, fpm->btree_root);
611 : 0 : relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
612 : : }
2690 rhaas@postgresql.org 613 :CBC 156 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
614 : : }
615 [ + + ]: 2198 : else if (root->hdr.nused == 2 &&
616 [ + - ]: 1151 : root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
617 : : {
618 : : Size end_of_first;
619 : : Size start_of_second;
620 : :
621 : 1151 : end_of_first = root->u.leaf_key[0].first_page +
622 : 1151 : root->u.leaf_key[0].npages;
623 : 1151 : start_of_second = root->u.leaf_key[1].first_page;
624 : :
625 [ + + ]: 1151 : if (end_of_first + 1 == start_of_second)
626 : : {
627 : 42 : Size root_page = fpm_pointer_to_page(base, root);
628 : :
629 [ + - ]: 42 : if (end_of_first == root_page)
630 : : {
631 : 42 : FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
632 : 42 : FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
633 : 42 : fpm->singleton_first_page = root->u.leaf_key[0].first_page;
634 : 42 : fpm->singleton_npages = root->u.leaf_key[0].npages +
635 : 42 : root->u.leaf_key[1].npages + 1;
636 : 42 : fpm->btree_depth = 0;
637 : 42 : relptr_store(base, fpm->btree_root,
638 : : (FreePageBtree *) NULL);
639 : 42 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
640 : : fpm->singleton_npages);
641 [ - + ]: 42 : Assert(max_contiguous_pages == 0);
642 : 42 : max_contiguous_pages = fpm->singleton_npages;
643 : : }
644 : : }
645 : :
646 : : /* Whether it worked or not, it's time to stop. */
647 : 1151 : break;
648 : : }
649 : : else
650 : : {
651 : : /* Nothing more to do. Stop. */
652 : : break;
653 : : }
654 : : }
655 : :
656 : : /*
657 : : * Attempt to free recycled btree pages. We skip this if releasing the
658 : : * recycled page would require a btree page split, because the page we're
659 : : * trying to recycle would be consumed by the split, which would be
660 : : * counterproductive.
661 : : *
662 : : * We also currently only ever attempt to recycle the first page on the
663 : : * list; that could be made more aggressive, but it's not clear that the
664 : : * complexity would be worthwhile.
665 : : */
666 [ + + ]: 12732 : while (fpm->btree_recycle_count > 0)
667 : : {
668 : : FreePageBtree *btp;
669 : : Size first_page;
670 : : Size contiguous_pages;
671 : :
672 : 298 : btp = FreePageBtreeGetRecycled(fpm);
673 : 298 : first_page = fpm_pointer_to_page(base, btp);
674 : 298 : contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
675 [ + + ]: 298 : if (contiguous_pages == 0)
676 : : {
677 : 228 : FreePageBtreeRecycle(fpm, first_page);
678 : 228 : break;
679 : : }
680 : : else
681 : : {
682 [ + - ]: 70 : if (contiguous_pages > max_contiguous_pages)
683 : 70 : max_contiguous_pages = contiguous_pages;
684 : : }
685 : : }
686 : :
687 : 12662 : return max_contiguous_pages;
688 : : }
689 : :
690 : : /*
691 : : * Consider consolidating the given page with its left or right sibling,
692 : : * if it's fairly empty.
693 : : */
694 : : static void
695 : 629 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
696 : : {
697 : 629 : char *base = fpm_segment_base(fpm);
698 : : FreePageBtree *np;
699 : : Size max;
700 : :
701 : : /*
702 : : * We only try to consolidate pages that are less than a third full. We
703 : : * could be more aggressive about this, but that might risk performing
704 : : * consolidation only to end up splitting again shortly thereafter. Since
705 : : * the btree should be very small compared to the space under management,
706 : : * our goal isn't so much to ensure that it always occupies the absolutely
707 : : * smallest possible number of pages as to reclaim pages before things get
708 : : * too egregiously out of hand.
709 : : */
710 [ + - ]: 629 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
711 : 629 : max = FPM_ITEMS_PER_LEAF_PAGE;
712 : : else
713 : : {
2690 rhaas@postgresql.org 714 [ # # ]:UBC 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
715 : 0 : max = FPM_ITEMS_PER_INTERNAL_PAGE;
716 : : }
2690 rhaas@postgresql.org 717 [ - + ]:CBC 629 : if (btp->hdr.nused >= max / 3)
2690 rhaas@postgresql.org 718 :UBC 0 : return;
719 : :
720 : : /*
721 : : * If we can fit our right sibling's keys onto this page, consolidate.
722 : : */
2690 rhaas@postgresql.org 723 :CBC 629 : np = FreePageBtreeFindRightSibling(base, btp);
724 [ - + - - ]: 629 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
725 : : {
2690 rhaas@postgresql.org 726 [ # # ]:UBC 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
727 : : {
728 : 0 : memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
729 : 0 : sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
730 : 0 : btp->hdr.nused += np->hdr.nused;
731 : : }
732 : : else
733 : : {
734 : 0 : memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
735 : 0 : sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
736 : 0 : btp->hdr.nused += np->hdr.nused;
737 : 0 : FreePageBtreeUpdateParentPointers(base, btp);
738 : : }
739 : 0 : FreePageBtreeRemovePage(fpm, np);
740 : 0 : return;
741 : : }
742 : :
743 : : /*
744 : : * If we can fit our keys onto our left sibling's page, consolidate. In
745 : : * this case, we move our keys onto the other page rather than vice versa,
746 : : * to avoid having to adjust ancestor keys.
747 : : */
2690 rhaas@postgresql.org 748 :CBC 629 : np = FreePageBtreeFindLeftSibling(base, btp);
749 [ - + - - ]: 629 : if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
750 : : {
2690 rhaas@postgresql.org 751 [ # # ]:UBC 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
752 : : {
753 : 0 : memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
754 : 0 : sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
755 : 0 : np->hdr.nused += btp->hdr.nused;
756 : : }
757 : : else
758 : : {
759 : 0 : memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
760 : 0 : sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
761 : 0 : np->hdr.nused += btp->hdr.nused;
762 : 0 : FreePageBtreeUpdateParentPointers(base, np);
763 : : }
764 : 0 : FreePageBtreeRemovePage(fpm, btp);
765 : 0 : return;
766 : : }
767 : : }
768 : :
769 : : /*
770 : : * Find the passed page's left sibling; that is, the page at the same level
771 : : * of the tree whose keyspace immediately precedes ours.
772 : : */
773 : : static FreePageBtree *
2690 rhaas@postgresql.org 774 :CBC 629 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
775 : : {
776 : 629 : FreePageBtree *p = btp;
777 : 629 : int levels = 0;
778 : :
779 : : /* Move up until we can move left. */
780 : : for (;;)
2690 rhaas@postgresql.org 781 :UBC 0 : {
782 : : Size first_page;
783 : : Size index;
784 : :
2690 rhaas@postgresql.org 785 :CBC 629 : first_page = FreePageBtreeFirstKey(p);
786 [ - + ]: 629 : p = relptr_access(base, p->hdr.parent);
787 : :
788 [ + - ]: 629 : if (p == NULL)
789 : 629 : return NULL; /* we were passed the rightmost page */
790 : :
2690 rhaas@postgresql.org 791 :UBC 0 : index = FreePageBtreeSearchInternal(p, first_page);
792 [ # # ]: 0 : if (index > 0)
793 : : {
794 [ # # ]: 0 : Assert(p->u.internal_key[index].first_page == first_page);
795 [ # # ]: 0 : p = relptr_access(base, p->u.internal_key[index - 1].child);
796 : 0 : break;
797 : : }
798 [ # # ]: 0 : Assert(index == 0);
799 : 0 : ++levels;
800 : : }
801 : :
802 : : /* Descend left. */
803 [ # # ]: 0 : while (levels > 0)
804 : : {
805 [ # # ]: 0 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
806 [ # # ]: 0 : p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
807 : 0 : --levels;
808 : : }
809 [ # # ]: 0 : Assert(p->hdr.magic == btp->hdr.magic);
810 : :
811 : 0 : return p;
812 : : }
813 : :
814 : : /*
815 : : * Find the passed page's right sibling; that is, the page at the same level
816 : : * of the tree whose keyspace immediately follows ours.
817 : : */
818 : : static FreePageBtree *
2690 rhaas@postgresql.org 819 :CBC 634 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
820 : : {
821 : 634 : FreePageBtree *p = btp;
822 : 634 : int levels = 0;
823 : :
824 : : /* Move up until we can move right. */
825 : : for (;;)
2690 rhaas@postgresql.org 826 :UBC 0 : {
827 : : Size first_page;
828 : : Size index;
829 : :
2690 rhaas@postgresql.org 830 :CBC 634 : first_page = FreePageBtreeFirstKey(p);
831 [ - + ]: 634 : p = relptr_access(base, p->hdr.parent);
832 : :
833 [ + - ]: 634 : if (p == NULL)
834 : 634 : return NULL; /* we were passed the rightmost page */
835 : :
2690 rhaas@postgresql.org 836 :UBC 0 : index = FreePageBtreeSearchInternal(p, first_page);
837 [ # # ]: 0 : if (index < p->hdr.nused - 1)
838 : : {
839 [ # # ]: 0 : Assert(p->u.internal_key[index].first_page == first_page);
840 [ # # ]: 0 : p = relptr_access(base, p->u.internal_key[index + 1].child);
841 : 0 : break;
842 : : }
843 [ # # ]: 0 : Assert(index == p->hdr.nused - 1);
844 : 0 : ++levels;
845 : : }
846 : :
847 : : /* Descend left. */
848 [ # # ]: 0 : while (levels > 0)
849 : : {
850 [ # # ]: 0 : Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
851 [ # # ]: 0 : p = relptr_access(base, p->u.internal_key[0].child);
852 : 0 : --levels;
853 : : }
854 [ # # ]: 0 : Assert(p->hdr.magic == btp->hdr.magic);
855 : :
856 : 0 : return p;
857 : : }
858 : :
859 : : /*
860 : : * Get the first key on a btree page.
861 : : */
862 : : static Size
2690 rhaas@postgresql.org 863 :CBC 1263 : FreePageBtreeFirstKey(FreePageBtree *btp)
864 : : {
865 [ - + ]: 1263 : Assert(btp->hdr.nused > 0);
866 : :
867 [ + - ]: 1263 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
868 : 1263 : return btp->u.leaf_key[0].first_page;
869 : : else
870 : : {
2690 rhaas@postgresql.org 871 [ # # ]:UBC 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
872 : 0 : return btp->u.internal_key[0].first_page;
873 : : }
874 : : }
875 : :
876 : : /*
877 : : * Get a page from the btree recycle list for use as a btree page.
878 : : */
879 : : static FreePageBtree *
2690 rhaas@postgresql.org 880 :CBC 384 : FreePageBtreeGetRecycled(FreePageManager *fpm)
881 : : {
882 : 384 : char *base = fpm_segment_base(fpm);
883 [ + - ]: 384 : FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
884 : : FreePageSpanLeader *newhead;
885 : :
886 [ - + ]: 384 : Assert(victim != NULL);
887 [ - + ]: 384 : newhead = relptr_access(base, victim->next);
888 [ - + ]: 384 : if (newhead != NULL)
2690 rhaas@postgresql.org 889 :UBC 0 : relptr_copy(newhead->prev, victim->prev);
2690 rhaas@postgresql.org 890 :CBC 384 : relptr_store(base, fpm->btree_recycle, newhead);
891 [ - + ]: 384 : Assert(fpm_pointer_is_page_aligned(base, victim));
892 : 384 : fpm->btree_recycle_count--;
893 : 384 : return (FreePageBtree *) victim;
894 : : }
895 : :
896 : : /*
897 : : * Insert an item into an internal page.
898 : : */
899 : : static void
2690 rhaas@postgresql.org 900 :UBC 0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
901 : : Size first_page, FreePageBtree *child)
902 : : {
903 [ # # ]: 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
904 [ # # ]: 0 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
905 [ # # ]: 0 : Assert(index <= btp->hdr.nused);
906 : 0 : memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
907 : 0 : sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
908 : 0 : btp->u.internal_key[index].first_page = first_page;
909 : 0 : relptr_store(base, btp->u.internal_key[index].child, child);
910 : 0 : ++btp->hdr.nused;
911 : 0 : }
912 : :
913 : : /*
914 : : * Insert an item into a leaf page.
915 : : */
916 : : static void
2690 rhaas@postgresql.org 917 :CBC 729 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
918 : : Size npages)
919 : : {
920 [ - + ]: 729 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
921 [ - + ]: 729 : Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
922 [ - + ]: 729 : Assert(index <= btp->hdr.nused);
923 : 729 : memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
924 : 729 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
925 : 729 : btp->u.leaf_key[index].first_page = first_page;
926 : 729 : btp->u.leaf_key[index].npages = npages;
927 : 729 : ++btp->hdr.nused;
928 : 729 : }
929 : :
930 : : /*
931 : : * Put a page on the btree recycle list.
932 : : */
933 : : static void
934 : 384 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
935 : : {
936 : 384 : char *base = fpm_segment_base(fpm);
937 [ - + ]: 384 : FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
938 : : FreePageSpanLeader *span;
939 : :
940 : 384 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
941 : 384 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
942 : 384 : span->npages = 1;
943 : 384 : relptr_store(base, span->next, head);
944 : 384 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
945 [ - + ]: 384 : if (head != NULL)
2690 rhaas@postgresql.org 946 :UBC 0 : relptr_store(base, head->prev, span);
2690 rhaas@postgresql.org 947 :CBC 384 : relptr_store(base, fpm->btree_recycle, span);
948 : 384 : fpm->btree_recycle_count++;
949 : 384 : }
950 : :
951 : : /*
952 : : * Remove an item from the btree at the given position on the given page.
953 : : */
954 : : static void
955 : 629 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
956 : : {
957 [ - + ]: 629 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
958 [ - + ]: 629 : Assert(index < btp->hdr.nused);
959 : :
960 : : /* When last item is removed, extirpate entire page from btree. */
961 [ - + ]: 629 : if (btp->hdr.nused == 1)
962 : : {
2690 rhaas@postgresql.org 963 :UBC 0 : FreePageBtreeRemovePage(fpm, btp);
964 : 0 : return;
965 : : }
966 : :
967 : : /* Physically remove the key from the page. */
2690 rhaas@postgresql.org 968 :CBC 629 : --btp->hdr.nused;
969 [ + + ]: 629 : if (index < btp->hdr.nused)
970 : 628 : memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
971 : 628 : sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
972 : :
973 : : /* If we just removed the first key, adjust ancestor keys. */
974 [ + + ]: 629 : if (index == 0)
975 : 233 : FreePageBtreeAdjustAncestorKeys(fpm, btp);
976 : :
977 : : /* Consider whether to consolidate this page with a sibling. */
978 : 629 : FreePageBtreeConsolidate(fpm, btp);
979 : : }
980 : :
981 : : /*
982 : : * Remove a page from the btree. Caller is responsible for having relocated
983 : : * any keys from this page that are still wanted. The page is placed on the
984 : : * recycled list.
985 : : */
986 : : static void
2690 rhaas@postgresql.org 987 :UBC 0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
988 : : {
989 : 0 : char *base = fpm_segment_base(fpm);
990 : : FreePageBtree *parent;
991 : : Size index;
992 : : Size first_page;
993 : :
994 : : for (;;)
995 : : {
996 : : /* Find parent page. */
997 [ # # ]: 0 : parent = relptr_access(base, btp->hdr.parent);
998 [ # # ]: 0 : if (parent == NULL)
999 : : {
1000 : : /* We are removing the root page. */
1001 : 0 : relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
1002 : 0 : fpm->btree_depth = 0;
1003 [ # # ]: 0 : Assert(fpm->singleton_first_page == 0);
1004 [ # # ]: 0 : Assert(fpm->singleton_npages == 0);
1005 : 0 : return;
1006 : : }
1007 : :
1008 : : /*
1009 : : * If the parent contains only one item, we need to remove it as well.
1010 : : */
1011 [ # # ]: 0 : if (parent->hdr.nused > 1)
1012 : 0 : break;
1013 : 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1014 : 0 : btp = parent;
1015 : : }
1016 : :
1017 : : /* Find and remove the downlink. */
1018 : 0 : first_page = FreePageBtreeFirstKey(btp);
1019 [ # # ]: 0 : if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1020 : : {
1021 : 0 : index = FreePageBtreeSearchLeaf(parent, first_page);
1022 [ # # ]: 0 : Assert(index < parent->hdr.nused);
1023 [ # # ]: 0 : if (index < parent->hdr.nused - 1)
1024 : 0 : memmove(&parent->u.leaf_key[index],
1025 : 0 : &parent->u.leaf_key[index + 1],
1026 : : sizeof(FreePageBtreeLeafKey)
1027 : 0 : * (parent->hdr.nused - index - 1));
1028 : : }
1029 : : else
1030 : : {
1031 : 0 : index = FreePageBtreeSearchInternal(parent, first_page);
1032 [ # # ]: 0 : Assert(index < parent->hdr.nused);
1033 [ # # ]: 0 : if (index < parent->hdr.nused - 1)
1034 : 0 : memmove(&parent->u.internal_key[index],
1035 : 0 : &parent->u.internal_key[index + 1],
1036 : : sizeof(FreePageBtreeInternalKey)
1037 : 0 : * (parent->hdr.nused - index - 1));
1038 : : }
1039 : 0 : parent->hdr.nused--;
1040 [ # # ]: 0 : Assert(parent->hdr.nused > 0);
1041 : :
1042 : : /* Recycle the page. */
1043 : 0 : FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
1044 : :
1045 : : /* Adjust ancestor keys if needed. */
1046 [ # # ]: 0 : if (index == 0)
1047 : 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1048 : :
1049 : : /* Consider whether to consolidate the parent with a sibling. */
1050 : 0 : FreePageBtreeConsolidate(fpm, parent);
1051 : : }
1052 : :
1053 : : /*
1054 : : * Search the btree for an entry for the given first page and initialize
1055 : : * *result with the results of the search. result->page and result->index
1056 : : * indicate either the position of an exact match or the position at which
1057 : : * the new key should be inserted. result->found is true for an exact match,
1058 : : * otherwise false. result->split_pages will contain the number of additional
1059 : : * btree pages that will be needed when performing a split to insert a key.
1060 : : * Except as described above, the contents of fields in the result object are
1061 : : * undefined on return.
1062 : : */
1063 : : static void
2690 rhaas@postgresql.org 1064 :CBC 3083 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
1065 : : FreePageBtreeSearchResult *result)
1066 : : {
1067 : 3083 : char *base = fpm_segment_base(fpm);
1068 [ + - ]: 3083 : FreePageBtree *btp = relptr_access(base, fpm->btree_root);
1069 : : Size index;
1070 : :
1071 : 3083 : result->split_pages = 1;
1072 : :
1073 : : /* If the btree is empty, there's nothing to find. */
1074 [ - + ]: 3083 : if (btp == NULL)
1075 : : {
2690 rhaas@postgresql.org 1076 :UBC 0 : result->page = NULL;
1077 : 0 : result->found = false;
1078 : 0 : return;
1079 : : }
1080 : :
1081 : : /* Descend until we hit a leaf. */
2690 rhaas@postgresql.org 1082 [ - + ]:CBC 3083 : while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1083 : : {
1084 : : FreePageBtree *child;
1085 : : bool found_exact;
1086 : :
2690 rhaas@postgresql.org 1087 :UBC 0 : index = FreePageBtreeSearchInternal(btp, first_page);
1088 [ # # ]: 0 : found_exact = index < btp->hdr.nused &&
1089 [ # # ]: 0 : btp->u.internal_key[index].first_page == first_page;
1090 : :
1091 : : /*
1092 : : * If we found an exact match we descend directly. Otherwise, we
1093 : : * descend into the child to the left if possible so that we can find
1094 : : * the insertion point at that child's high end.
1095 : : */
1096 [ # # # # ]: 0 : if (!found_exact && index > 0)
1097 : 0 : --index;
1098 : :
1099 : : /* Track required split depth for leaf insert. */
1100 [ # # ]: 0 : if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
1101 : : {
1102 [ # # ]: 0 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1103 : 0 : result->split_pages++;
1104 : : }
1105 : : else
1106 : 0 : result->split_pages = 0;
1107 : :
1108 : : /* Descend to appropriate child page. */
1109 [ # # ]: 0 : Assert(index < btp->hdr.nused);
1110 [ # # ]: 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1111 [ # # # # ]: 0 : Assert(relptr_access(base, child->hdr.parent) == btp);
1112 : 0 : btp = child;
1113 : : }
1114 : :
1115 : : /* Track required split depth for leaf insert. */
2690 rhaas@postgresql.org 1116 [ - + ]:CBC 3083 : if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
1117 : : {
2690 rhaas@postgresql.org 1118 [ # # ]:UBC 0 : Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
1119 : 0 : result->split_pages++;
1120 : : }
1121 : : else
2690 rhaas@postgresql.org 1122 :CBC 3083 : result->split_pages = 0;
1123 : :
1124 : : /* Search leaf page. */
1125 : 3083 : index = FreePageBtreeSearchLeaf(btp, first_page);
1126 : :
1127 : : /* Assemble results. */
1128 : 3083 : result->page = btp;
1129 : 3083 : result->index = index;
1130 [ + + ]: 6161 : result->found = index < btp->hdr.nused &&
1131 [ + + ]: 3078 : first_page == btp->u.leaf_key[index].first_page;
1132 : : }
1133 : :
1134 : : /*
1135 : : * Search an internal page for the first key greater than or equal to a given
1136 : : * page number. Returns the index of that key, or one greater than the number
1137 : : * of keys on the page if none.
1138 : : */
1139 : : static Size
2690 rhaas@postgresql.org 1140 :UBC 0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
1141 : : {
1142 : 0 : Size low = 0;
1143 : 0 : Size high = btp->hdr.nused;
1144 : :
1145 [ # # ]: 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1146 [ # # # # ]: 0 : Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
1147 : :
1148 [ # # ]: 0 : while (low < high)
1149 : : {
1150 : 0 : Size mid = (low + high) / 2;
1151 : 0 : Size val = btp->u.internal_key[mid].first_page;
1152 : :
1153 [ # # ]: 0 : if (first_page == val)
1154 : 0 : return mid;
1155 [ # # ]: 0 : else if (first_page < val)
1156 : 0 : high = mid;
1157 : : else
1158 : 0 : low = mid + 1;
1159 : : }
1160 : :
1161 : 0 : return low;
1162 : : }
1163 : :
1164 : : /*
1165 : : * Search a leaf page for the first key greater than or equal to a given
1166 : : * page number. Returns the index of that key, or one greater than the number
1167 : : * of keys on the page if none.
1168 : : */
1169 : : static Size
2690 rhaas@postgresql.org 1170 :CBC 3083 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
1171 : : {
1172 : 3083 : Size low = 0;
1173 : 3083 : Size high = btp->hdr.nused;
1174 : :
1175 [ - + ]: 3083 : Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
1176 [ + - - + ]: 3083 : Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
1177 : :
1178 [ + + ]: 7445 : while (low < high)
1179 : : {
1180 : 5662 : Size mid = (low + high) / 2;
1181 : 5662 : Size val = btp->u.leaf_key[mid].first_page;
1182 : :
1183 [ + + ]: 5662 : if (first_page == val)
1184 : 1300 : return mid;
1185 [ + + ]: 4362 : else if (first_page < val)
1186 : 3229 : high = mid;
1187 : : else
1188 : 1133 : low = mid + 1;
1189 : : }
1190 : :
1191 : 1783 : return low;
1192 : : }
1193 : :
1194 : : /*
1195 : : * Allocate a new btree page and move half the keys from the provided page
1196 : : * to the new page. Caller is responsible for making sure that there's a
1197 : : * page available from fpm->btree_recycle. Returns a pointer to the new page,
1198 : : * to which caller must add a downlink.
1199 : : */
1200 : : static FreePageBtree *
2690 rhaas@postgresql.org 1201 :UBC 0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
1202 : : {
1203 : : FreePageBtree *newsibling;
1204 : :
1205 : 0 : newsibling = FreePageBtreeGetRecycled(fpm);
1206 : 0 : newsibling->hdr.magic = btp->hdr.magic;
1207 : 0 : newsibling->hdr.nused = btp->hdr.nused / 2;
1208 : 0 : relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
1209 : 0 : btp->hdr.nused -= newsibling->hdr.nused;
1210 : :
1211 [ # # ]: 0 : if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
1212 : 0 : memcpy(&newsibling->u.leaf_key,
1213 : 0 : &btp->u.leaf_key[btp->hdr.nused],
1214 : 0 : sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
1215 : : else
1216 : : {
1217 [ # # ]: 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1218 : 0 : memcpy(&newsibling->u.internal_key,
1219 : 0 : &btp->u.internal_key[btp->hdr.nused],
1220 : 0 : sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
1221 : 0 : FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
1222 : : }
1223 : :
1224 : 0 : return newsibling;
1225 : : }
1226 : :
1227 : : /*
1228 : : * When internal pages are split or merged, the parent pointers of their
1229 : : * children must be updated.
1230 : : */
1231 : : static void
1232 : 0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
1233 : : {
1234 : : Size i;
1235 : :
1236 [ # # ]: 0 : Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
1237 [ # # ]: 0 : for (i = 0; i < btp->hdr.nused; ++i)
1238 : : {
1239 : : FreePageBtree *child;
1240 : :
1241 [ # # ]: 0 : child = relptr_access(base, btp->u.internal_key[i].child);
1242 : 0 : relptr_store(base, child->hdr.parent, btp);
1243 : : }
1244 : 0 : }
1245 : :
1246 : : /*
1247 : : * Debugging dump of btree data.
1248 : : */
1249 : : static void
1250 : 0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
1251 : : FreePageBtree *parent, int level, StringInfo buf)
1252 : : {
1253 : 0 : char *base = fpm_segment_base(fpm);
1254 : 0 : Size pageno = fpm_pointer_to_page(base, btp);
1255 : : Size index;
1256 : : FreePageBtree *check_parent;
1257 : :
1258 : 0 : check_stack_depth();
1259 [ # # ]: 0 : check_parent = relptr_access(base, btp->hdr.parent);
1260 : 0 : appendStringInfo(buf, " %zu@%d %c", pageno, level,
1261 [ # # ]: 0 : btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
1262 [ # # ]: 0 : if (parent != check_parent)
1263 : 0 : appendStringInfo(buf, " [actual parent %zu, expected %zu]",
1264 : 0 : fpm_pointer_to_page(base, check_parent),
1265 : 0 : fpm_pointer_to_page(base, parent));
1266 : 0 : appendStringInfoChar(buf, ':');
1267 [ # # ]: 0 : for (index = 0; index < btp->hdr.nused; ++index)
1268 : : {
1269 [ # # ]: 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1270 : 0 : appendStringInfo(buf, " %zu->%zu",
1271 : : btp->u.internal_key[index].first_page,
657 tmunro@postgresql.or 1272 : 0 : relptr_offset(btp->u.internal_key[index].child) / FPM_PAGE_SIZE);
1273 : : else
2690 rhaas@postgresql.org 1274 : 0 : appendStringInfo(buf, " %zu(%zu)",
1275 : : btp->u.leaf_key[index].first_page,
1276 : : btp->u.leaf_key[index].npages);
1277 : : }
2434 peter_e@gmx.net 1278 : 0 : appendStringInfoChar(buf, '\n');
1279 : :
2690 rhaas@postgresql.org 1280 [ # # ]: 0 : if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
1281 : : {
1282 [ # # ]: 0 : for (index = 0; index < btp->hdr.nused; ++index)
1283 : : {
1284 : : FreePageBtree *child;
1285 : :
1286 [ # # ]: 0 : child = relptr_access(base, btp->u.internal_key[index].child);
1287 : 0 : FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
1288 : : }
1289 : : }
1290 : 0 : }
1291 : :
1292 : : /*
1293 : : * Debugging dump of free-span data.
1294 : : */
1295 : : static void
1296 : 0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
1297 : : Size expected_pages, StringInfo buf)
1298 : : {
1299 : 0 : char *base = fpm_segment_base(fpm);
1300 : :
1301 [ # # ]: 0 : while (span != NULL)
1302 : : {
1303 [ # # ]: 0 : if (span->npages != expected_pages)
1304 : 0 : appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
1305 : : span->npages);
1306 : : else
1307 : 0 : appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
1308 [ # # ]: 0 : span = relptr_access(base, span->next);
1309 : : }
1310 : :
2434 peter_e@gmx.net 1311 : 0 : appendStringInfoChar(buf, '\n');
2690 rhaas@postgresql.org 1312 : 0 : }
1313 : :
1314 : : /*
1315 : : * This function allocates a run of pages of the given length from the free
1316 : : * page manager.
1317 : : */
1318 : : static bool
2690 rhaas@postgresql.org 1319 :CBC 11235 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
1320 : : {
1321 : 11235 : char *base = fpm_segment_base(fpm);
1322 : 11235 : FreePageSpanLeader *victim = NULL;
1323 : : FreePageSpanLeader *prev;
1324 : : FreePageSpanLeader *next;
1325 : : FreePageBtreeSearchResult result;
1326 : 11235 : Size victim_page = 0; /* placate compiler */
1327 : : Size f;
1328 : :
1329 : : /*
1330 : : * Search for a free span.
1331 : : *
1332 : : * Right now, we use a simple best-fit policy here, but it's possible for
1333 : : * this to result in memory fragmentation if we're repeatedly asked to
1334 : : * allocate chunks just a little smaller than what we have available.
1335 : : * Hopefully, this is unlikely, because we expect most requests to be
1336 : : * single pages or superblock-sized chunks -- but no policy can be optimal
1337 : : * under all circumstances unless it has knowledge of future allocation
1338 : : * patterns.
1339 : : */
1340 [ + - ]: 866786 : for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
1341 : : {
1342 : : /* Skip empty freelists. */
1343 [ + + ]: 866786 : if (relptr_is_null(fpm->freelist[f]))
1344 : 855551 : continue;
1345 : :
1346 : : /*
1347 : : * All of the freelists except the last one contain only items of a
1348 : : * single size, so we just take the first one. But the final free
1349 : : * list contains everything too big for any of the other lists, so we
1350 : : * need to search the list.
1351 : : */
1352 [ + + ]: 11235 : if (f < FPM_NUM_FREELISTS - 1)
1353 [ + - ]: 5316 : victim = relptr_access(base, fpm->freelist[f]);
1354 : : else
1355 : : {
1356 : : FreePageSpanLeader *candidate;
1357 : :
1358 [ + - ]: 5919 : candidate = relptr_access(base, fpm->freelist[f]);
1359 : : do
1360 : : {
1361 [ + - - + ]: 5919 : if (candidate->npages >= npages && (victim == NULL ||
2489 tgl@sss.pgh.pa.us 1362 [ # # ]:UBC 0 : victim->npages > candidate->npages))
1363 : : {
2690 rhaas@postgresql.org 1364 :CBC 5919 : victim = candidate;
1365 [ - + ]: 5919 : if (victim->npages == npages)
2690 rhaas@postgresql.org 1366 :UBC 0 : break;
1367 : : }
2690 rhaas@postgresql.org 1368 [ - + ]:CBC 5919 : candidate = relptr_access(base, candidate->next);
1369 [ - + ]: 5919 : } while (candidate != NULL);
1370 : : }
1371 : 11235 : break;
1372 : : }
1373 : :
1374 : : /* If we didn't find an allocatable span, return failure. */
1375 [ - + ]: 11235 : if (victim == NULL)
2690 rhaas@postgresql.org 1376 :UBC 0 : return false;
1377 : :
1378 : : /* Remove span from free list. */
2690 rhaas@postgresql.org 1379 [ - + ]:CBC 11235 : Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
1380 [ - + ]: 11235 : prev = relptr_access(base, victim->prev);
1381 [ + + ]: 11235 : next = relptr_access(base, victim->next);
1382 [ - + ]: 11235 : if (prev != NULL)
2690 rhaas@postgresql.org 1383 :UBC 0 : relptr_copy(prev->next, victim->next);
1384 : : else
2690 rhaas@postgresql.org 1385 :CBC 11235 : relptr_copy(fpm->freelist[f], victim->next);
1386 [ + + ]: 11235 : if (next != NULL)
1387 : 128 : relptr_copy(next->prev, victim->prev);
1388 : 11235 : victim_page = fpm_pointer_to_page(base, victim);
1389 : :
1390 : : /* Decide whether we might be invalidating contiguous_pages. */
1391 [ + + ]: 11235 : if (f == FPM_NUM_FREELISTS - 1 &&
1392 [ + - ]: 5919 : victim->npages == fpm->contiguous_pages)
1393 : : {
1394 : : /*
1395 : : * The victim span came from the oversized freelist, and had the same
1396 : : * size as the longest span. There may or may not be another one of
1397 : : * the same size, so contiguous_pages must be recomputed just to be
1398 : : * safe.
1399 : : */
1400 : 5919 : fpm->contiguous_pages_dirty = true;
1401 : : }
1402 [ + + ]: 5316 : else if (f + 1 == fpm->contiguous_pages &&
1403 [ + - ]: 4253 : relptr_is_null(fpm->freelist[f]))
1404 : : {
1405 : : /*
1406 : : * The victim span came from a fixed sized freelist, and it was the
1407 : : * list for spans of the same size as the current longest span, and
1408 : : * the list is now empty after removing the victim. So
1409 : : * contiguous_pages must be recomputed without a doubt.
1410 : : */
1411 : 4253 : fpm->contiguous_pages_dirty = true;
1412 : : }
1413 : :
1414 : : /*
1415 : : * If we haven't initialized the btree yet, the victim must be the single
1416 : : * span stored within the FreePageManager itself. Otherwise, we need to
1417 : : * update the btree.
1418 : : */
1419 [ + + ]: 11235 : if (relptr_is_null(fpm->btree_root))
1420 : : {
1421 [ - + ]: 9935 : Assert(victim_page == fpm->singleton_first_page);
1422 [ - + ]: 9935 : Assert(victim->npages == fpm->singleton_npages);
1423 [ - + ]: 9935 : Assert(victim->npages >= npages);
1424 : 9935 : fpm->singleton_first_page += npages;
1425 : 9935 : fpm->singleton_npages -= npages;
1426 [ + + ]: 9935 : if (fpm->singleton_npages > 0)
1427 : 9919 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1428 : : fpm->singleton_npages);
1429 : : }
1430 : : else
1431 : : {
1432 : : /*
1433 : : * If the span we found is exactly the right size, remove it from the
1434 : : * btree completely. Otherwise, adjust the btree entry to reflect the
1435 : : * still-unallocated portion of the span, and put that portion on the
1436 : : * appropriate free list.
1437 : : */
1438 : 1300 : FreePageBtreeSearch(fpm, victim_page, &result);
1439 [ - + ]: 1300 : Assert(result.found);
1440 [ + + ]: 1300 : if (victim->npages == npages)
1441 : 509 : FreePageBtreeRemove(fpm, result.page, result.index);
1442 : : else
1443 : : {
1444 : : FreePageBtreeLeafKey *key;
1445 : :
1446 : : /* Adjust btree to reflect remaining pages. */
1447 [ - + ]: 791 : Assert(victim->npages > npages);
1448 : 791 : key = &result.page->u.leaf_key[result.index];
1449 [ - + ]: 791 : Assert(key->npages == victim->npages);
1450 : 791 : key->first_page += npages;
1451 : 791 : key->npages -= npages;
1452 [ + + ]: 791 : if (result.index == 0)
1453 : 412 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1454 : :
1455 : : /* Put the unallocated pages back on the appropriate free list. */
1456 : 791 : FreePagePushSpanLeader(fpm, victim_page + npages,
1457 : 791 : victim->npages - npages);
1458 : : }
1459 : : }
1460 : :
1461 : : /* Return results to caller. */
1462 : 11235 : *first_page = fpm_pointer_to_page(base, victim);
1463 : 11235 : return true;
1464 : : }
1465 : :
1466 : : /*
1467 : : * Put a range of pages into the btree and freelists, consolidating it with
1468 : : * existing free spans just before and/or after it. If 'soft' is true,
1469 : : * only perform the insertion if it can be done without allocating new btree
1470 : : * pages; if false, do it always. Returns 0 if the soft flag caused the
1471 : : * insertion to be skipped, or otherwise the size of the contiguous span
1472 : : * created by the insertion. This may be larger than npages if we're able
1473 : : * to consolidate with an adjacent range.
1474 : : */
1475 : : static Size
1476 : 4588 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
1477 : : bool soft)
1478 : : {
1479 : 4588 : char *base = fpm_segment_base(fpm);
1480 : : FreePageBtreeSearchResult result;
1481 : 4588 : FreePageBtreeLeafKey *prevkey = NULL;
1482 : 4588 : FreePageBtreeLeafKey *nextkey = NULL;
1483 : : FreePageBtree *np;
1484 : : Size nindex;
1485 : :
1486 [ - + ]: 4588 : Assert(npages > 0);
1487 : :
1488 : : /* We can store a single free span without initializing the btree. */
1489 [ + + ]: 4588 : if (fpm->btree_depth == 0)
1490 : : {
1491 [ + + ]: 3032 : if (fpm->singleton_npages == 0)
1492 : : {
1493 : : /* Don't have a span yet; store this one. */
1494 : 1961 : fpm->singleton_first_page = first_page;
1495 : 1961 : fpm->singleton_npages = npages;
1496 : 1961 : FreePagePushSpanLeader(fpm, first_page, npages);
1497 : 1961 : return fpm->singleton_npages;
1498 : : }
1499 [ + + ]: 1071 : else if (fpm->singleton_first_page + fpm->singleton_npages ==
1500 : : first_page)
1501 : : {
1502 : : /* New span immediately follows sole existing span. */
1503 : 5 : fpm->singleton_npages += npages;
1504 : 5 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1505 : 5 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1506 : : fpm->singleton_npages);
1507 : 5 : return fpm->singleton_npages;
1508 : : }
1509 [ + + ]: 1066 : else if (first_page + npages == fpm->singleton_first_page)
1510 : : {
1511 : : /* New span immediately precedes sole existing span. */
1512 : 595 : FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
1513 : 595 : fpm->singleton_first_page = first_page;
1514 : 595 : fpm->singleton_npages += npages;
1515 : 595 : FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
1516 : : fpm->singleton_npages);
1517 : 595 : return fpm->singleton_npages;
1518 : : }
1519 : : else
1520 : : {
1521 : : /* Not contiguous; we need to initialize the btree. */
1522 : : Size root_page;
1523 : : FreePageBtree *root;
1524 : :
1525 [ + + ]: 471 : if (!relptr_is_null(fpm->btree_recycle))
1526 : 86 : root = FreePageBtreeGetRecycled(fpm);
1887 tmunro@postgresql.or 1527 [ + + ]: 385 : else if (soft)
1528 : 244 : return 0; /* Should not allocate if soft. */
2690 rhaas@postgresql.org 1529 [ + - ]: 157 : else if (FreePageManagerGetInternal(fpm, 1, &root_page))
1530 : 157 : root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
1531 : : else
1532 : : {
1533 : : /* We'd better be able to get a page from the existing range. */
2690 rhaas@postgresql.org 1534 [ # # ]:UBC 0 : elog(FATAL, "free page manager btree is corrupt");
1535 : : }
1536 : :
1537 : : /* Create the btree and move the preexisting range into it. */
2690 rhaas@postgresql.org 1538 :CBC 243 : root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
1539 : 243 : root->hdr.nused = 1;
1540 : 243 : relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
1541 : 243 : root->u.leaf_key[0].first_page = fpm->singleton_first_page;
1542 : 243 : root->u.leaf_key[0].npages = fpm->singleton_npages;
1543 : 243 : relptr_store(base, fpm->btree_root, root);
1544 : 243 : fpm->singleton_first_page = 0;
1545 : 243 : fpm->singleton_npages = 0;
1546 : 243 : fpm->btree_depth = 1;
1547 : :
1548 : : /*
1549 : : * Corner case: it may be that the btree root took the very last
1550 : : * free page. In that case, the sole btree entry covers a zero
1551 : : * page run, which is invalid. Overwrite it with the entry we're
1552 : : * trying to insert and get out.
1553 : : */
1554 [ + + ]: 243 : if (root->u.leaf_key[0].npages == 0)
1555 : : {
1556 : 16 : root->u.leaf_key[0].first_page = first_page;
1557 : 16 : root->u.leaf_key[0].npages = npages;
1558 : 16 : FreePagePushSpanLeader(fpm, first_page, npages);
1559 : 16 : return npages;
1560 : : }
1561 : :
1562 : : /* Fall through to insert the new key. */
1563 : : }
1564 : : }
1565 : :
1566 : : /* Search the btree. */
1567 : 1783 : FreePageBtreeSearch(fpm, first_page, &result);
1568 [ - + ]: 1783 : Assert(!result.found);
1569 [ + + ]: 1783 : if (result.index > 0)
1570 : 1081 : prevkey = &result.page->u.leaf_key[result.index - 1];
1571 [ + + ]: 1783 : if (result.index < result.page->hdr.nused)
1572 : : {
1573 : 1778 : np = result.page;
1574 : 1778 : nindex = result.index;
1575 : 1778 : nextkey = &result.page->u.leaf_key[result.index];
1576 : : }
1577 : : else
1578 : : {
1579 : 5 : np = FreePageBtreeFindRightSibling(base, result.page);
1580 : 5 : nindex = 0;
1581 [ - + ]: 5 : if (np != NULL)
2690 rhaas@postgresql.org 1582 :UBC 0 : nextkey = &np->u.leaf_key[0];
1583 : : }
1584 : :
1585 : : /* Consolidate with the previous entry if possible. */
2690 rhaas@postgresql.org 1586 [ + + + + ]:CBC 1783 : if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
1587 : : {
1588 : 251 : bool remove_next = false;
1589 : : Size result;
1590 : :
1591 [ - + ]: 251 : Assert(prevkey->first_page + prevkey->npages == first_page);
1592 : 251 : prevkey->npages = (first_page - prevkey->first_page) + npages;
1593 : :
1594 : : /* Check whether we can *also* consolidate with the following entry. */
1595 [ + + ]: 251 : if (nextkey != NULL &&
1596 [ + + ]: 246 : prevkey->first_page + prevkey->npages >= nextkey->first_page)
1597 : : {
1598 [ - + ]: 120 : Assert(prevkey->first_page + prevkey->npages ==
1599 : : nextkey->first_page);
1600 : 120 : prevkey->npages = (nextkey->first_page - prevkey->first_page)
1601 : 120 : + nextkey->npages;
1602 : 120 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1603 : 120 : remove_next = true;
1604 : : }
1605 : :
1606 : : /* Put the span on the correct freelist and save size. */
1607 : 251 : FreePagePopSpanLeader(fpm, prevkey->first_page);
1608 : 251 : FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
1609 : 251 : result = prevkey->npages;
1610 : :
1611 : : /*
1612 : : * If we consolidated with both the preceding and following entries,
1613 : : * we must remove the following entry. We do this last, because
1614 : : * removing an element from the btree may invalidate pointers we hold
1615 : : * into the current data structure.
1616 : : *
1617 : : * NB: The btree is technically in an invalid state a this point
1618 : : * because we've already updated prevkey to cover the same key space
1619 : : * as nextkey. FreePageBtreeRemove() shouldn't notice that, though.
1620 : : */
1621 [ + + ]: 251 : if (remove_next)
1622 : 120 : FreePageBtreeRemove(fpm, np, nindex);
1623 : :
1624 : 251 : return result;
1625 : : }
1626 : :
1627 : : /* Consolidate with the next entry if possible. */
1628 [ + - + + ]: 1532 : if (nextkey != NULL && first_page + npages >= nextkey->first_page)
1629 : : {
1630 : : Size newpages;
1631 : :
1632 : : /* Compute new size for span. */
1633 [ - + ]: 803 : Assert(first_page + npages == nextkey->first_page);
1634 : 803 : newpages = (nextkey->first_page - first_page) + nextkey->npages;
1635 : :
1636 : : /* Put span on correct free list. */
1637 : 803 : FreePagePopSpanLeader(fpm, nextkey->first_page);
1638 : 803 : FreePagePushSpanLeader(fpm, first_page, newpages);
1639 : :
1640 : : /* Update key in place. */
1641 : 803 : nextkey->first_page = first_page;
1642 : 803 : nextkey->npages = newpages;
1643 : :
1644 : : /* If reducing first key on page, ancestors might need adjustment. */
1645 [ + + ]: 803 : if (nindex == 0)
1646 : 341 : FreePageBtreeAdjustAncestorKeys(fpm, np);
1647 : :
1648 : 803 : return nextkey->npages;
1649 : : }
1650 : :
1651 : : /* Split leaf page and as many of its ancestors as necessary. */
1652 [ - + ]: 729 : if (result.split_pages > 0)
1653 : : {
1654 : : /*
1655 : : * NB: We could consider various coping strategies here to avoid a
1656 : : * split; most obviously, if np != result.page, we could target that
1657 : : * page instead. More complicated shuffling strategies could be
1658 : : * possible as well; basically, unless every single leaf page is 100%
1659 : : * full, we can jam this key in there if we try hard enough. It's
1660 : : * unlikely that trying that hard is worthwhile, but it's possible we
1661 : : * might need to make more than no effort. For now, we just do the
1662 : : * easy thing, which is nothing.
1663 : : */
1664 : :
1665 : : /* If this is a soft insert, it's time to give up. */
2690 rhaas@postgresql.org 1666 [ # # ]:UBC 0 : if (soft)
1667 : 0 : return 0;
1668 : :
1669 : : /* Check whether we need to allocate more btree pages to split. */
1670 [ # # ]: 0 : if (result.split_pages > fpm->btree_recycle_count)
1671 : : {
1672 : : Size pages_needed;
1673 : : Size recycle_page;
1674 : : Size i;
1675 : :
1676 : : /*
1677 : : * Allocate the required number of pages and split each one in
1678 : : * turn. This should never fail, because if we've got enough
1679 : : * spans of free pages kicking around that we need additional
1680 : : * storage space just to remember them all, then we should
1681 : : * certainly have enough to expand the btree, which should only
1682 : : * ever use a tiny number of pages compared to the number under
1683 : : * management. If it does, something's badly screwed up.
1684 : : */
1685 : 0 : pages_needed = result.split_pages - fpm->btree_recycle_count;
1686 [ # # ]: 0 : for (i = 0; i < pages_needed; ++i)
1687 : : {
1688 [ # # ]: 0 : if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
1689 [ # # ]: 0 : elog(FATAL, "free page manager btree is corrupt");
1690 : 0 : FreePageBtreeRecycle(fpm, recycle_page);
1691 : : }
1692 : :
1693 : : /*
1694 : : * The act of allocating pages to recycle may have invalidated the
1695 : : * results of our previous btree research, so repeat it. (We could
1696 : : * recheck whether any of our split-avoidance strategies that were
1697 : : * not viable before now are, but it hardly seems worthwhile, so
1698 : : * we don't bother. Consolidation can't be possible now if it
1699 : : * wasn't previously.)
1700 : : */
1701 : 0 : FreePageBtreeSearch(fpm, first_page, &result);
1702 : :
1703 : : /*
1704 : : * The act of allocating pages for use in constructing our btree
1705 : : * should never cause any page to become more full, so the new
1706 : : * split depth should be no greater than the old one, and perhaps
1707 : : * less if we fortuitously allocated a chunk that freed up a slot
1708 : : * on the page we need to update.
1709 : : */
1710 [ # # ]: 0 : Assert(result.split_pages <= fpm->btree_recycle_count);
1711 : : }
1712 : :
1713 : : /* If we still need to perform a split, do it. */
1714 [ # # ]: 0 : if (result.split_pages > 0)
1715 : : {
1716 : 0 : FreePageBtree *split_target = result.page;
1717 : 0 : FreePageBtree *child = NULL;
1718 : 0 : Size key = first_page;
1719 : :
1720 : : for (;;)
1721 : 0 : {
1722 : : FreePageBtree *newsibling;
1723 : : FreePageBtree *parent;
1724 : :
1725 : : /* Identify parent page, which must receive downlink. */
1726 [ # # ]: 0 : parent = relptr_access(base, split_target->hdr.parent);
1727 : :
1728 : : /* Split the page - downlink not added yet. */
1729 : 0 : newsibling = FreePageBtreeSplitPage(fpm, split_target);
1730 : :
1731 : : /*
1732 : : * At this point in the loop, we're always carrying a pending
1733 : : * insertion. On the first pass, it's the actual key we're
1734 : : * trying to insert; on subsequent passes, it's the downlink
1735 : : * that needs to be added as a result of the split performed
1736 : : * during the previous loop iteration. Since we've just split
1737 : : * the page, there's definitely room on one of the two
1738 : : * resulting pages.
1739 : : */
1740 [ # # ]: 0 : if (child == NULL)
1741 : : {
1742 : : Size index;
1743 : : FreePageBtree *insert_into;
1744 : :
1745 : 0 : insert_into = key < newsibling->u.leaf_key[0].first_page ?
1746 [ # # ]: 0 : split_target : newsibling;
1747 : 0 : index = FreePageBtreeSearchLeaf(insert_into, key);
1748 : 0 : FreePageBtreeInsertLeaf(insert_into, index, key, npages);
1749 [ # # # # ]: 0 : if (index == 0 && insert_into == split_target)
1750 : 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1751 : : }
1752 : : else
1753 : : {
1754 : : Size index;
1755 : : FreePageBtree *insert_into;
1756 : :
1757 : 0 : insert_into =
1758 : 0 : key < newsibling->u.internal_key[0].first_page ?
1759 [ # # ]: 0 : split_target : newsibling;
1760 : 0 : index = FreePageBtreeSearchInternal(insert_into, key);
1761 : 0 : FreePageBtreeInsertInternal(base, insert_into, index,
1762 : : key, child);
1763 : 0 : relptr_store(base, child->hdr.parent, insert_into);
1764 [ # # # # ]: 0 : if (index == 0 && insert_into == split_target)
1765 : 0 : FreePageBtreeAdjustAncestorKeys(fpm, split_target);
1766 : : }
1767 : :
1768 : : /* If the page we just split has no parent, split the root. */
1769 [ # # ]: 0 : if (parent == NULL)
1770 : : {
1771 : : FreePageBtree *newroot;
1772 : :
1773 : 0 : newroot = FreePageBtreeGetRecycled(fpm);
1774 : 0 : newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
1775 : 0 : newroot->hdr.nused = 2;
1776 : 0 : relptr_store(base, newroot->hdr.parent,
1777 : : (FreePageBtree *) NULL);
1778 : 0 : newroot->u.internal_key[0].first_page =
1779 : 0 : FreePageBtreeFirstKey(split_target);
1780 : 0 : relptr_store(base, newroot->u.internal_key[0].child,
1781 : : split_target);
1782 : 0 : relptr_store(base, split_target->hdr.parent, newroot);
1783 : 0 : newroot->u.internal_key[1].first_page =
1784 : 0 : FreePageBtreeFirstKey(newsibling);
1785 : 0 : relptr_store(base, newroot->u.internal_key[1].child,
1786 : : newsibling);
1787 : 0 : relptr_store(base, newsibling->hdr.parent, newroot);
1788 : 0 : relptr_store(base, fpm->btree_root, newroot);
1789 : 0 : fpm->btree_depth++;
1790 : :
1791 : 0 : break;
1792 : : }
1793 : :
1794 : : /* If the parent page isn't full, insert the downlink. */
1795 : 0 : key = newsibling->u.internal_key[0].first_page;
1796 [ # # ]: 0 : if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
1797 : : {
1798 : : Size index;
1799 : :
1800 : 0 : index = FreePageBtreeSearchInternal(parent, key);
1801 : 0 : FreePageBtreeInsertInternal(base, parent, index,
1802 : : key, newsibling);
1803 : 0 : relptr_store(base, newsibling->hdr.parent, parent);
1804 [ # # ]: 0 : if (index == 0)
1805 : 0 : FreePageBtreeAdjustAncestorKeys(fpm, parent);
1806 : 0 : break;
1807 : : }
1808 : :
1809 : : /* The parent also needs to be split, so loop around. */
1810 : 0 : child = newsibling;
1811 : 0 : split_target = parent;
1812 : : }
1813 : :
1814 : : /*
1815 : : * The loop above did the insert, so just need to update the free
1816 : : * list, and we're done.
1817 : : */
1818 : 0 : FreePagePushSpanLeader(fpm, first_page, npages);
1819 : :
1820 : 0 : return npages;
1821 : : }
1822 : : }
1823 : :
1824 : : /* Physically add the key to the page. */
2690 rhaas@postgresql.org 1825 [ - + ]:CBC 729 : Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
1826 : 729 : FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
1827 : :
1828 : : /* If new first key on page, ancestors might need adjustment. */
1829 [ + + ]: 729 : if (result.index == 0)
1830 : 361 : FreePageBtreeAdjustAncestorKeys(fpm, result.page);
1831 : :
1832 : : /* Put it on the free list. */
1833 : 729 : FreePagePushSpanLeader(fpm, first_page, npages);
1834 : :
1835 : 729 : return npages;
1836 : : }
1837 : :
1838 : : /*
1839 : : * Remove a FreePageSpanLeader from the linked-list that contains it, either
1840 : : * because we're changing the size of the span, or because we're allocating it.
1841 : : */
1842 : : static void
1843 : 1858 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
1844 : : {
1845 : 1858 : char *base = fpm_segment_base(fpm);
1846 : : FreePageSpanLeader *span;
1847 : : FreePageSpanLeader *next;
1848 : : FreePageSpanLeader *prev;
1849 : :
1850 : 1858 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
1851 : :
1852 [ + + ]: 1858 : next = relptr_access(base, span->next);
1853 [ + + ]: 1858 : prev = relptr_access(base, span->prev);
1854 [ + + ]: 1858 : if (next != NULL)
1855 : 136 : relptr_copy(next->prev, span->prev);
1856 [ + + ]: 1858 : if (prev != NULL)
1857 : 56 : relptr_copy(prev->next, span->next);
1858 : : else
1859 : : {
1860 : 1802 : Size f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
1861 : :
657 tmunro@postgresql.or 1862 [ - + ]: 1802 : Assert(relptr_offset(fpm->freelist[f]) == pageno * FPM_PAGE_SIZE);
2690 rhaas@postgresql.org 1863 : 1802 : relptr_copy(fpm->freelist[f], span->next);
1864 : : }
1865 : 1858 : }
1866 : :
1867 : : /*
1868 : : * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
1869 : : */
1870 : : static void
1871 : 15112 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
1872 : : {
1873 : 15112 : char *base = fpm_segment_base(fpm);
1874 : 15112 : Size f = Min(npages, FPM_NUM_FREELISTS) - 1;
1875 [ + + ]: 15112 : FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
1876 : : FreePageSpanLeader *span;
1877 : :
1878 : 15112 : span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
1879 : 15112 : span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
1880 : 15112 : span->npages = npages;
1881 : 15112 : relptr_store(base, span->next, head);
1882 : 15112 : relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
1883 [ + + ]: 15112 : if (head != NULL)
1884 : 310 : relptr_store(base, head->prev, span);
1885 : 15112 : relptr_store(base, fpm->freelist[f], span);
1886 : 15112 : }
|