LCOV - differential code coverage report
Current view: top level - src/backend/utils/mmgr - freepage.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 55.1 % 670 369 301 369
Current Date: 2023-04-08 15:15:32 Functions: 72.4 % 29 21 8 21
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * freepage.c
       4                 :  *    Management of free memory pages.
       5                 :  *
       6                 :  * The intention of this code is to provide infrastructure for memory
       7                 :  * allocators written specifically for PostgreSQL.  At least in the case
       8                 :  * of dynamic shared memory, we can't simply use malloc() or even
       9                 :  * relatively thin wrappers like palloc() which sit on top of it, because
      10                 :  * no allocator built into the operating system will deal with relative
      11                 :  * pointers.  In the future, we may find other cases in which greater
      12                 :  * control over our own memory management seems desirable.
      13                 :  *
      14                 :  * A FreePageManager keeps track of which 4kB pages of memory are currently
      15                 :  * unused from the point of view of some higher-level memory allocator.
      16                 :  * Unlike a user-facing allocator such as palloc(), a FreePageManager can
      17                 :  * only allocate and free in units of whole pages, and freeing an
      18                 :  * allocation can only be done given knowledge of its length in pages.
      19                 :  *
      20                 :  * Since a free page manager has only a fixed amount of dedicated memory,
      21                 :  * and since there is no underlying allocator, it uses the free pages
      22                 :  * it is given to manage to store its bookkeeping data.  It keeps multiple
      23                 :  * freelists of runs of pages, sorted by the size of the run; the head of
      24                 :  * each freelist is stored in the FreePageManager itself, and the first
      25                 :  * page of each run contains a relative pointer to the next run. See
      26                 :  * FreePageManagerGetInternal for more details on how the freelists are
      27                 :  * managed.
      28                 :  *
      29                 :  * To avoid memory fragmentation, it's important to consolidate adjacent
      30                 :  * spans of pages whenever possible; otherwise, large allocation requests
      31                 :  * might not be satisfied even when sufficient contiguous space is
      32                 :  * available.  Therefore, in addition to the freelists, we maintain an
      33                 :  * in-memory btree of free page ranges ordered by page number.  If a
      34                 :  * range being freed precedes or follows a range that is already free,
      35                 :  * the existing range is extended; if it exactly bridges the gap between
      36                 :  * free ranges, then the two existing ranges are consolidated with the
      37                 :  * newly-freed range to form one great big range of free pages.
      38                 :  *
      39                 :  * When there is only one range of free pages, the btree is trivial and
      40                 :  * is stored within the FreePageManager proper; otherwise, pages are
      41                 :  * allocated from the area under management as needed.  Even in cases
      42                 :  * where memory fragmentation is very severe, only a tiny fraction of
      43                 :  * the pages under management are consumed by this btree.
      44                 :  *
      45                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      46                 :  * Portions Copyright (c) 1994, Regents of the University of California
      47                 :  *
      48                 :  * IDENTIFICATION
      49                 :  *    src/backend/utils/mmgr/freepage.c
      50                 :  *
      51                 :  *-------------------------------------------------------------------------
      52                 :  */
      53                 : 
      54                 : #include "postgres.h"
      55                 : #include "lib/stringinfo.h"
      56                 : #include "miscadmin.h"
      57                 : 
      58                 : #include "utils/freepage.h"
      59                 : #include "utils/relptr.h"
      60                 : 
      61                 : 
      62                 : /* Magic numbers to identify various page types */
      63                 : #define FREE_PAGE_SPAN_LEADER_MAGIC     0xea4020f0
      64                 : #define FREE_PAGE_LEAF_MAGIC            0x98eae728
      65                 : #define FREE_PAGE_INTERNAL_MAGIC        0x19aa32c9
      66                 : 
      67                 : /* Doubly linked list of spans of free pages; stored in first page of span. */
      68                 : struct FreePageSpanLeader
      69                 : {
      70                 :     int         magic;          /* always FREE_PAGE_SPAN_LEADER_MAGIC */
      71                 :     Size        npages;         /* number of pages in span */
      72                 :     RelptrFreePageSpanLeader prev;
      73                 :     RelptrFreePageSpanLeader next;
      74                 : };
      75                 : 
      76                 : /* Common header for btree leaf and internal pages. */
      77                 : typedef struct FreePageBtreeHeader
      78                 : {
      79                 :     int         magic;          /* FREE_PAGE_LEAF_MAGIC or
      80                 :                                  * FREE_PAGE_INTERNAL_MAGIC */
      81                 :     Size        nused;          /* number of items used */
      82                 :     RelptrFreePageBtree parent; /* uplink */
      83                 : } FreePageBtreeHeader;
      84                 : 
      85                 : /* Internal key; points to next level of btree. */
      86                 : typedef struct FreePageBtreeInternalKey
      87                 : {
      88                 :     Size        first_page;     /* low bound for keys on child page */
      89                 :     RelptrFreePageBtree child;  /* downlink */
      90                 : } FreePageBtreeInternalKey;
      91                 : 
      92                 : /* Leaf key; no payload data. */
      93                 : typedef struct FreePageBtreeLeafKey
      94                 : {
      95                 :     Size        first_page;     /* first page in span */
      96                 :     Size        npages;         /* number of pages in span */
      97                 : } FreePageBtreeLeafKey;
      98                 : 
      99                 : /* Work out how many keys will fit on a page. */
     100                 : #define FPM_ITEMS_PER_INTERNAL_PAGE \
     101                 :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     102                 :         sizeof(FreePageBtreeInternalKey))
     103                 : #define FPM_ITEMS_PER_LEAF_PAGE \
     104                 :     ((FPM_PAGE_SIZE - sizeof(FreePageBtreeHeader)) / \
     105                 :         sizeof(FreePageBtreeLeafKey))
     106                 : 
     107                 : /* A btree page of either sort */
     108                 : struct FreePageBtree
     109                 : {
     110                 :     FreePageBtreeHeader hdr;
     111                 :     union
     112                 :     {
     113                 :         FreePageBtreeInternalKey internal_key[FPM_ITEMS_PER_INTERNAL_PAGE];
     114                 :         FreePageBtreeLeafKey leaf_key[FPM_ITEMS_PER_LEAF_PAGE];
     115                 :     }           u;
     116                 : };
     117                 : 
     118                 : /* Results of a btree search */
     119                 : typedef struct FreePageBtreeSearchResult
     120                 : {
     121                 :     FreePageBtree *page;
     122                 :     Size        index;
     123                 :     bool        found;
     124                 :     unsigned    split_pages;
     125                 : } FreePageBtreeSearchResult;
     126                 : 
     127                 : /* Helper functions */
     128                 : static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
     129                 :                                             FreePageBtree *btp);
     130                 : static Size FreePageBtreeCleanup(FreePageManager *fpm);
     131                 : static FreePageBtree *FreePageBtreeFindLeftSibling(char *base,
     132                 :                                                    FreePageBtree *btp);
     133                 : static FreePageBtree *FreePageBtreeFindRightSibling(char *base,
     134                 :                                                     FreePageBtree *btp);
     135                 : static Size FreePageBtreeFirstKey(FreePageBtree *btp);
     136                 : static FreePageBtree *FreePageBtreeGetRecycled(FreePageManager *fpm);
     137                 : static void FreePageBtreeInsertInternal(char *base, FreePageBtree *btp,
     138                 :                                         Size index, Size first_page, FreePageBtree *child);
     139                 : static void FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index,
     140                 :                                     Size first_page, Size npages);
     141                 : static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
     142                 : static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
     143                 :                                 Size index);
     144                 : static void FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp);
     145                 : static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
     146                 :                                 FreePageBtreeSearchResult *result);
     147                 : static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
     148                 : static Size FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page);
     149                 : static FreePageBtree *FreePageBtreeSplitPage(FreePageManager *fpm,
     150                 :                                              FreePageBtree *btp);
     151                 : static void FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp);
     152                 : static void FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
     153                 :                                      FreePageBtree *parent, int level, StringInfo buf);
     154                 : static void FreePageManagerDumpSpans(FreePageManager *fpm,
     155                 :                                      FreePageSpanLeader *span, Size expected_pages,
     156                 :                                      StringInfo buf);
     157                 : static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
     158                 :                                        Size *first_page);
     159                 : static Size FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
     160                 :                                        Size npages, bool soft);
     161                 : static void FreePagePopSpanLeader(FreePageManager *fpm, Size pageno);
     162                 : static void FreePagePushSpanLeader(FreePageManager *fpm, Size first_page,
     163                 :                                    Size npages);
     164                 : static Size FreePageManagerLargestContiguous(FreePageManager *fpm);
     165                 : static void FreePageManagerUpdateLargest(FreePageManager *fpm);
     166                 : 
     167                 : #ifdef FPM_EXTRA_ASSERTS
     168                 : static Size sum_free_pages(FreePageManager *fpm);
     169                 : #endif
     170                 : 
     171                 : /*
     172                 :  * Initialize a new, empty free page manager.
     173                 :  *
     174                 :  * 'fpm' should reference caller-provided memory large enough to contain a
     175                 :  * FreePageManager.  We'll initialize it here.
     176                 :  *
     177                 :  * 'base' is the address to which all pointers are relative.  When managing
     178                 :  * a dynamic shared memory segment, it should normally be the base of the
     179                 :  * segment.  When managing backend-private memory, it can be either NULL or,
     180                 :  * if managing a single contiguous extent of memory, the start of that extent.
     181                 :  */
     182                 : void
     183 CBC        3557 : FreePageManagerInitialize(FreePageManager *fpm, char *base)
     184                 : {
     185                 :     Size        f;
     186                 : 
     187            3557 :     relptr_store(base, fpm->self, fpm);
     188            3557 :     relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     189            3557 :     relptr_store(base, fpm->btree_recycle, (FreePageSpanLeader *) NULL);
     190            3557 :     fpm->btree_depth = 0;
     191            3557 :     fpm->btree_recycle_count = 0;
     192            3557 :     fpm->singleton_first_page = 0;
     193            3557 :     fpm->singleton_npages = 0;
     194            3557 :     fpm->contiguous_pages = 0;
     195            3557 :     fpm->contiguous_pages_dirty = true;
     196                 : #ifdef FPM_EXTRA_ASSERTS
     197                 :     fpm->free_pages = 0;
     198                 : #endif
     199                 : 
     200          462410 :     for (f = 0; f < FPM_NUM_FREELISTS; f++)
     201          458853 :         relptr_store(base, fpm->freelist[f], (FreePageSpanLeader *) NULL);
     202            3557 : }
     203                 : 
     204                 : /*
     205                 :  * Allocate a run of pages of the given length from the free page manager.
     206                 :  * The return value indicates whether we were able to satisfy the request;
     207                 :  * if true, the first page of the allocation is stored in *first_page.
     208                 :  */
     209                 : bool
     210           15938 : FreePageManagerGet(FreePageManager *fpm, Size npages, Size *first_page)
     211                 : {
     212                 :     bool        result;
     213                 :     Size        contiguous_pages;
     214                 : 
     215           15938 :     result = FreePageManagerGetInternal(fpm, npages, first_page);
     216                 : 
     217                 :     /*
     218                 :      * It's a bit counterintuitive, but allocating pages can actually create
     219                 :      * opportunities for cleanup that create larger ranges.  We might pull a
     220                 :      * key out of the btree that enables the item at the head of the btree
     221                 :      * recycle list to be inserted; and then if there are more items behind it
     222                 :      * one of those might cause two currently-separated ranges to merge,
     223                 :      * creating a single range of contiguous pages larger than any that
     224                 :      * existed previously.  It might be worth trying to improve the cleanup
     225                 :      * algorithm to avoid such corner cases, but for now we just notice the
     226                 :      * condition and do the appropriate reporting.
     227                 :      */
     228           15938 :     contiguous_pages = FreePageBtreeCleanup(fpm);
     229           15938 :     if (fpm->contiguous_pages < contiguous_pages)
     230              42 :         fpm->contiguous_pages = contiguous_pages;
     231                 : 
     232                 :     /*
     233                 :      * FreePageManagerGetInternal may have set contiguous_pages_dirty.
     234                 :      * Recompute contiguous_pages if so.
     235                 :      */
     236           15938 :     FreePageManagerUpdateLargest(fpm);
     237                 : 
     238                 : #ifdef FPM_EXTRA_ASSERTS
     239                 :     if (result)
     240                 :     {
     241                 :         Assert(fpm->free_pages >= npages);
     242                 :         fpm->free_pages -= npages;
     243                 :     }
     244                 :     Assert(fpm->free_pages == sum_free_pages(fpm));
     245                 :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     246                 : #endif
     247           15938 :     return result;
     248                 : }
     249                 : 
     250                 : #ifdef FPM_EXTRA_ASSERTS
     251                 : static void
     252                 : sum_free_pages_recurse(FreePageManager *fpm, FreePageBtree *btp, Size *sum)
     253                 : {
     254                 :     char       *base = fpm_segment_base(fpm);
     255                 : 
     256                 :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ||
     257                 :            btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     258                 :     ++*sum;
     259                 :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
     260                 :     {
     261                 :         Size        index;
     262                 : 
     263                 : 
     264                 :         for (index = 0; index < btp->hdr.nused; ++index)
     265                 :         {
     266                 :             FreePageBtree *child;
     267                 : 
     268                 :             child = relptr_access(base, btp->u.internal_key[index].child);
     269                 :             sum_free_pages_recurse(fpm, child, sum);
     270                 :         }
     271                 :     }
     272                 : }
     273                 : static Size
     274                 : sum_free_pages(FreePageManager *fpm)
     275                 : {
     276                 :     FreePageSpanLeader *recycle;
     277                 :     char       *base = fpm_segment_base(fpm);
     278                 :     Size        sum = 0;
     279                 :     int         list;
     280                 : 
     281                 :     /* Count the spans by scanning the freelists. */
     282                 :     for (list = 0; list < FPM_NUM_FREELISTS; ++list)
     283                 :     {
     284                 : 
     285                 :         if (!relptr_is_null(fpm->freelist[list]))
     286                 :         {
     287                 :             FreePageSpanLeader *candidate =
     288                 :             relptr_access(base, fpm->freelist[list]);
     289                 : 
     290                 :             do
     291                 :             {
     292                 :                 sum += candidate->npages;
     293                 :                 candidate = relptr_access(base, candidate->next);
     294                 :             } while (candidate != NULL);
     295                 :         }
     296                 :     }
     297                 : 
     298                 :     /* Count btree internal pages. */
     299                 :     if (fpm->btree_depth > 0)
     300                 :     {
     301                 :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     302                 : 
     303                 :         sum_free_pages_recurse(fpm, root, &sum);
     304                 :     }
     305                 : 
     306                 :     /* Count the recycle list. */
     307                 :     for (recycle = relptr_access(base, fpm->btree_recycle);
     308                 :          recycle != NULL;
     309                 :          recycle = relptr_access(base, recycle->next))
     310                 :     {
     311                 :         Assert(recycle->npages == 1);
     312                 :         ++sum;
     313                 :     }
     314                 : 
     315                 :     return sum;
     316                 : }
     317                 : #endif
     318                 : 
     319                 : /*
     320                 :  * Compute the size of the largest run of pages that the user could
     321                 :  * successfully get.
     322                 :  */
     323                 : static Size
     324           18419 : FreePageManagerLargestContiguous(FreePageManager *fpm)
     325                 : {
     326                 :     char       *base;
     327                 :     Size        largest;
     328                 : 
     329           18419 :     base = fpm_segment_base(fpm);
     330           18419 :     largest = 0;
     331           18419 :     if (!relptr_is_null(fpm->freelist[FPM_NUM_FREELISTS - 1]))
     332                 :     {
     333                 :         FreePageSpanLeader *candidate;
     334                 : 
     335            8466 :         candidate = relptr_access(base, fpm->freelist[FPM_NUM_FREELISTS - 1]);
     336                 :         do
     337                 :         {
     338            8466 :             if (candidate->npages > largest)
     339            8466 :                 largest = candidate->npages;
     340            8466 :             candidate = relptr_access(base, candidate->next);
     341            8466 :         } while (candidate != NULL);
     342                 :     }
     343                 :     else
     344                 :     {
     345            9953 :         Size        f = FPM_NUM_FREELISTS - 1;
     346                 : 
     347                 :         do
     348                 :         {
     349          856789 :             --f;
     350          856789 :             if (!relptr_is_null(fpm->freelist[f]))
     351                 :             {
     352            9953 :                 largest = f + 1;
     353            9953 :                 break;
     354                 :             }
     355          846836 :         } while (f > 0);
     356                 :     }
     357                 : 
     358           18419 :     return largest;
     359                 : }
     360                 : 
     361                 : /*
     362                 :  * Recompute the size of the largest run of pages that the user could
     363                 :  * successfully get, if it has been marked dirty.
     364                 :  */
     365                 : static void
     366           21403 : FreePageManagerUpdateLargest(FreePageManager *fpm)
     367                 : {
     368           21403 :     if (!fpm->contiguous_pages_dirty)
     369            2984 :         return;
     370                 : 
     371           18419 :     fpm->contiguous_pages = FreePageManagerLargestContiguous(fpm);
     372           18419 :     fpm->contiguous_pages_dirty = false;
     373                 : }
     374                 : 
     375                 : /*
     376                 :  * Transfer a run of pages to the free page manager.
     377                 :  */
     378                 : void
     379            5465 : FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
     380                 : {
     381                 :     Size        contiguous_pages;
     382                 : 
     383            5465 :     Assert(npages > 0);
     384                 : 
     385                 :     /* Record the new pages. */
     386                 :     contiguous_pages =
     387            5465 :         FreePageManagerPutInternal(fpm, first_page, npages, false);
     388                 : 
     389                 :     /*
     390                 :      * If the new range we inserted into the page manager was contiguous with
     391                 :      * an existing range, it may have opened up cleanup opportunities.
     392                 :      */
     393            5465 :     if (contiguous_pages > npages)
     394                 :     {
     395                 :         Size        cleanup_contiguous_pages;
     396                 : 
     397            1612 :         cleanup_contiguous_pages = FreePageBtreeCleanup(fpm);
     398            1612 :         if (cleanup_contiguous_pages > contiguous_pages)
     399              58 :             contiguous_pages = cleanup_contiguous_pages;
     400                 :     }
     401                 : 
     402                 :     /* See if we now have a new largest chunk. */
     403            5465 :     if (fpm->contiguous_pages < contiguous_pages)
     404            3976 :         fpm->contiguous_pages = contiguous_pages;
     405                 : 
     406                 :     /*
     407                 :      * The earlier call to FreePageManagerPutInternal may have set
     408                 :      * contiguous_pages_dirty if it needed to allocate internal pages, so
     409                 :      * recompute contiguous_pages if necessary.
     410                 :      */
     411            5465 :     FreePageManagerUpdateLargest(fpm);
     412                 : 
     413                 : #ifdef FPM_EXTRA_ASSERTS
     414                 :     fpm->free_pages += npages;
     415                 :     Assert(fpm->free_pages == sum_free_pages(fpm));
     416                 :     Assert(fpm->contiguous_pages == FreePageManagerLargestContiguous(fpm));
     417                 : #endif
     418            5465 : }
     419                 : 
     420                 : /*
     421                 :  * Produce a debugging dump of the state of a free page manager.
     422                 :  */
     423                 : char *
     424 UBC           0 : FreePageManagerDump(FreePageManager *fpm)
     425                 : {
     426               0 :     char       *base = fpm_segment_base(fpm);
     427                 :     StringInfoData buf;
     428                 :     FreePageSpanLeader *recycle;
     429               0 :     bool        dumped_any_freelist = false;
     430                 :     Size        f;
     431                 : 
     432                 :     /* Initialize output buffer. */
     433               0 :     initStringInfo(&buf);
     434                 : 
     435                 :     /* Dump general stuff. */
     436               0 :     appendStringInfo(&buf, "metadata: self %zu max contiguous pages = %zu\n",
     437               0 :                      relptr_offset(fpm->self), fpm->contiguous_pages);
     438                 : 
     439                 :     /* Dump btree. */
     440               0 :     if (fpm->btree_depth > 0)
     441                 :     {
     442                 :         FreePageBtree *root;
     443                 : 
     444               0 :         appendStringInfo(&buf, "btree depth %u:\n", fpm->btree_depth);
     445               0 :         root = relptr_access(base, fpm->btree_root);
     446               0 :         FreePageManagerDumpBtree(fpm, root, NULL, 0, &buf);
     447                 :     }
     448               0 :     else if (fpm->singleton_npages > 0)
     449                 :     {
     450               0 :         appendStringInfo(&buf, "singleton: %zu(%zu)\n",
     451                 :                          fpm->singleton_first_page, fpm->singleton_npages);
     452                 :     }
     453                 : 
     454                 :     /* Dump btree recycle list. */
     455               0 :     recycle = relptr_access(base, fpm->btree_recycle);
     456               0 :     if (recycle != NULL)
     457                 :     {
     458               0 :         appendStringInfoString(&buf, "btree recycle:");
     459               0 :         FreePageManagerDumpSpans(fpm, recycle, 1, &buf);
     460                 :     }
     461                 : 
     462                 :     /* Dump free lists. */
     463               0 :     for (f = 0; f < FPM_NUM_FREELISTS; ++f)
     464                 :     {
     465                 :         FreePageSpanLeader *span;
     466                 : 
     467               0 :         if (relptr_is_null(fpm->freelist[f]))
     468               0 :             continue;
     469               0 :         if (!dumped_any_freelist)
     470                 :         {
     471               0 :             appendStringInfoString(&buf, "freelists:\n");
     472               0 :             dumped_any_freelist = true;
     473                 :         }
     474               0 :         appendStringInfo(&buf, "  %zu:", f + 1);
     475               0 :         span = relptr_access(base, fpm->freelist[f]);
     476               0 :         FreePageManagerDumpSpans(fpm, span, f + 1, &buf);
     477                 :     }
     478                 : 
     479                 :     /* And return result to caller. */
     480               0 :     return buf.data;
     481                 : }
     482                 : 
     483                 : 
     484                 : /*
     485                 :  * The first_page value stored at index zero in any non-root page must match
     486                 :  * the first_page value stored in its parent at the index which points to that
     487                 :  * page.  So when the value stored at index zero in a btree page changes, we've
     488                 :  * got to walk up the tree adjusting ancestor keys until we reach an ancestor
     489                 :  * where that key isn't index zero.  This function should be called after
     490                 :  * updating the first key on the target page; it will propagate the change
     491                 :  * upward as far as needed.
     492                 :  *
     493                 :  * We assume here that the first key on the page has not changed enough to
     494                 :  * require changes in the ordering of keys on its ancestor pages.  Thus,
     495                 :  * if we search the parent page for the first key greater than or equal to
     496                 :  * the first key on the current page, the downlink to this page will be either
     497                 :  * the exact index returned by the search (if the first key decreased)
     498                 :  * or one less (if the first key increased).
     499                 :  */
     500                 : static void
     501 CBC        1171 : FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
     502                 : {
     503            1171 :     char       *base = fpm_segment_base(fpm);
     504                 :     Size        first_page;
     505                 :     FreePageBtree *parent;
     506                 :     FreePageBtree *child;
     507                 : 
     508                 :     /* This might be either a leaf or an internal page. */
     509            1171 :     Assert(btp->hdr.nused > 0);
     510            1171 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     511                 :     {
     512            1171 :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
     513            1171 :         first_page = btp->u.leaf_key[0].first_page;
     514                 :     }
     515                 :     else
     516                 :     {
     517 UBC           0 :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     518               0 :         Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
     519               0 :         first_page = btp->u.internal_key[0].first_page;
     520                 :     }
     521 CBC        1171 :     child = btp;
     522                 : 
     523                 :     /* Loop until we find an ancestor that does not require adjustment. */
     524                 :     for (;;)
     525 UBC           0 :     {
     526                 :         Size        s;
     527                 : 
     528 CBC        1171 :         parent = relptr_access(base, child->hdr.parent);
     529            1171 :         if (parent == NULL)
     530            1171 :             break;
     531 UBC           0 :         s = FreePageBtreeSearchInternal(parent, first_page);
     532                 : 
     533                 :         /* Key is either at index s or index s-1; figure out which. */
     534               0 :         if (s >= parent->hdr.nused)
     535                 :         {
     536               0 :             Assert(s == parent->hdr.nused);
     537               0 :             --s;
     538                 :         }
     539                 :         else
     540                 :         {
     541                 :             FreePageBtree *check;
     542                 : 
     543               0 :             check = relptr_access(base, parent->u.internal_key[s].child);
     544               0 :             if (check != child)
     545                 :             {
     546               0 :                 Assert(s > 0);
     547               0 :                 --s;
     548                 :             }
     549                 :         }
     550                 : 
     551                 : #ifdef USE_ASSERT_CHECKING
     552                 :         /* Debugging double-check. */
     553                 :         {
     554                 :             FreePageBtree *check;
     555                 : 
     556               0 :             check = relptr_access(base, parent->u.internal_key[s].child);
     557               0 :             Assert(s < parent->hdr.nused);
     558               0 :             Assert(child == check);
     559                 :         }
     560                 : #endif
     561                 : 
     562                 :         /* Update the parent key. */
     563               0 :         parent->u.internal_key[s].first_page = first_page;
     564                 : 
     565                 :         /*
     566                 :          * If this is the first key in the parent, go up another level; else
     567                 :          * done.
     568                 :          */
     569               0 :         if (s > 0)
     570               0 :             break;
     571               0 :         child = parent;
     572                 :     }
     573 CBC        1171 : }
     574                 : 
     575                 : /*
     576                 :  * Attempt to reclaim space from the free-page btree.  The return value is
     577                 :  * the largest range of contiguous pages created by the cleanup operation.
     578                 :  */
     579                 : static Size
     580           17550 : FreePageBtreeCleanup(FreePageManager *fpm)
     581                 : {
     582           17550 :     char       *base = fpm_segment_base(fpm);
     583           17550 :     Size        max_contiguous_pages = 0;
     584                 : 
     585                 :     /* Attempt to shrink the depth of the btree. */
     586           17654 :     while (!relptr_is_null(fpm->btree_root))
     587                 :     {
     588            2247 :         FreePageBtree *root = relptr_access(base, fpm->btree_root);
     589                 : 
     590                 :         /* If the root contains only one key, reduce depth by one. */
     591            2247 :         if (root->hdr.nused == 1)
     592                 :         {
     593                 :             /* Shrink depth of tree by one. */
     594             104 :             Assert(fpm->btree_depth > 0);
     595             104 :             --fpm->btree_depth;
     596             104 :             if (root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     597                 :             {
     598                 :                 /* If root is a leaf, convert only entry to singleton range. */
     599             104 :                 relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
     600             104 :                 fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     601             104 :                 fpm->singleton_npages = root->u.leaf_key[0].npages;
     602                 :             }
     603                 :             else
     604                 :             {
     605                 :                 FreePageBtree *newroot;
     606                 : 
     607                 :                 /* If root is an internal page, make only child the root. */
     608 UBC           0 :                 Assert(root->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     609               0 :                 relptr_copy(fpm->btree_root, root->u.internal_key[0].child);
     610               0 :                 newroot = relptr_access(base, fpm->btree_root);
     611               0 :                 relptr_store(base, newroot->hdr.parent, (FreePageBtree *) NULL);
     612                 :             }
     613 CBC         104 :             FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, root));
     614                 :         }
     615            2143 :         else if (root->hdr.nused == 2 &&
     616            1054 :                  root->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     617                 :         {
     618                 :             Size        end_of_first;
     619                 :             Size        start_of_second;
     620                 : 
     621            1054 :             end_of_first = root->u.leaf_key[0].first_page +
     622            1054 :                 root->u.leaf_key[0].npages;
     623            1054 :             start_of_second = root->u.leaf_key[1].first_page;
     624                 : 
     625            1054 :             if (end_of_first + 1 == start_of_second)
     626                 :             {
     627              58 :                 Size        root_page = fpm_pointer_to_page(base, root);
     628                 : 
     629              58 :                 if (end_of_first == root_page)
     630                 :                 {
     631              58 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[0].first_page);
     632              58 :                     FreePagePopSpanLeader(fpm, root->u.leaf_key[1].first_page);
     633              58 :                     fpm->singleton_first_page = root->u.leaf_key[0].first_page;
     634              58 :                     fpm->singleton_npages = root->u.leaf_key[0].npages +
     635              58 :                         root->u.leaf_key[1].npages + 1;
     636              58 :                     fpm->btree_depth = 0;
     637              58 :                     relptr_store(base, fpm->btree_root,
     638                 :                                  (FreePageBtree *) NULL);
     639              58 :                     FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
     640                 :                                            fpm->singleton_npages);
     641              58 :                     Assert(max_contiguous_pages == 0);
     642              58 :                     max_contiguous_pages = fpm->singleton_npages;
     643                 :                 }
     644                 :             }
     645                 : 
     646                 :             /* Whether it worked or not, it's time to stop. */
     647            1054 :             break;
     648                 :         }
     649                 :         else
     650                 :         {
     651                 :             /* Nothing more to do.  Stop. */
     652                 :             break;
     653                 :         }
     654                 :     }
     655                 : 
     656                 :     /*
     657                 :      * Attempt to free recycled btree pages.  We skip this if releasing the
     658                 :      * recycled page would require a btree page split, because the page we're
     659                 :      * trying to recycle would be consumed by the split, which would be
     660                 :      * counterproductive.
     661                 :      *
     662                 :      * We also currently only ever attempt to recycle the first page on the
     663                 :      * list; that could be made more aggressive, but it's not clear that the
     664                 :      * complexity would be worthwhile.
     665                 :      */
     666           17592 :     while (fpm->btree_recycle_count > 0)
     667                 :     {
     668                 :         FreePageBtree *btp;
     669                 :         Size        first_page;
     670                 :         Size        contiguous_pages;
     671                 : 
     672             135 :         btp = FreePageBtreeGetRecycled(fpm);
     673             135 :         first_page = fpm_pointer_to_page(base, btp);
     674             135 :         contiguous_pages = FreePageManagerPutInternal(fpm, first_page, 1, true);
     675             135 :         if (contiguous_pages == 0)
     676                 :         {
     677              93 :             FreePageBtreeRecycle(fpm, first_page);
     678              93 :             break;
     679                 :         }
     680                 :         else
     681                 :         {
     682              42 :             if (contiguous_pages > max_contiguous_pages)
     683              42 :                 max_contiguous_pages = contiguous_pages;
     684                 :         }
     685                 :     }
     686                 : 
     687           17550 :     return max_contiguous_pages;
     688                 : }
     689                 : 
     690                 : /*
     691                 :  * Consider consolidating the given page with its left or right sibling,
     692                 :  * if it's fairly empty.
     693                 :  */
     694                 : static void
     695             498 : FreePageBtreeConsolidate(FreePageManager *fpm, FreePageBtree *btp)
     696                 : {
     697             498 :     char       *base = fpm_segment_base(fpm);
     698                 :     FreePageBtree *np;
     699                 :     Size        max;
     700                 : 
     701                 :     /*
     702                 :      * We only try to consolidate pages that are less than a third full. We
     703                 :      * could be more aggressive about this, but that might risk performing
     704                 :      * consolidation only to end up splitting again shortly thereafter.  Since
     705                 :      * the btree should be very small compared to the space under management,
     706                 :      * our goal isn't so much to ensure that it always occupies the absolutely
     707                 :      * smallest possible number of pages as to reclaim pages before things get
     708                 :      * too egregiously out of hand.
     709                 :      */
     710             498 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     711             498 :         max = FPM_ITEMS_PER_LEAF_PAGE;
     712                 :     else
     713                 :     {
     714 UBC           0 :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     715               0 :         max = FPM_ITEMS_PER_INTERNAL_PAGE;
     716                 :     }
     717 CBC         498 :     if (btp->hdr.nused >= max / 3)
     718 UBC           0 :         return;
     719                 : 
     720                 :     /*
     721                 :      * If we can fit our right sibling's keys onto this page, consolidate.
     722                 :      */
     723 CBC         498 :     np = FreePageBtreeFindRightSibling(base, btp);
     724             498 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     725                 :     {
     726 UBC           0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     727                 :         {
     728               0 :             memcpy(&btp->u.leaf_key[btp->hdr.nused], &np->u.leaf_key[0],
     729               0 :                    sizeof(FreePageBtreeLeafKey) * np->hdr.nused);
     730               0 :             btp->hdr.nused += np->hdr.nused;
     731                 :         }
     732                 :         else
     733                 :         {
     734               0 :             memcpy(&btp->u.internal_key[btp->hdr.nused], &np->u.internal_key[0],
     735               0 :                    sizeof(FreePageBtreeInternalKey) * np->hdr.nused);
     736               0 :             btp->hdr.nused += np->hdr.nused;
     737               0 :             FreePageBtreeUpdateParentPointers(base, btp);
     738                 :         }
     739               0 :         FreePageBtreeRemovePage(fpm, np);
     740               0 :         return;
     741                 :     }
     742                 : 
     743                 :     /*
     744                 :      * If we can fit our keys onto our left sibling's page, consolidate. In
     745                 :      * this case, we move our keys onto the other page rather than vice versa,
     746                 :      * to avoid having to adjust ancestor keys.
     747                 :      */
     748 CBC         498 :     np = FreePageBtreeFindLeftSibling(base, btp);
     749             498 :     if (np != NULL && btp->hdr.nused + np->hdr.nused <= max)
     750                 :     {
     751 UBC           0 :         if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     752                 :         {
     753               0 :             memcpy(&np->u.leaf_key[np->hdr.nused], &btp->u.leaf_key[0],
     754               0 :                    sizeof(FreePageBtreeLeafKey) * btp->hdr.nused);
     755               0 :             np->hdr.nused += btp->hdr.nused;
     756                 :         }
     757                 :         else
     758                 :         {
     759               0 :             memcpy(&np->u.internal_key[np->hdr.nused], &btp->u.internal_key[0],
     760               0 :                    sizeof(FreePageBtreeInternalKey) * btp->hdr.nused);
     761               0 :             np->hdr.nused += btp->hdr.nused;
     762               0 :             FreePageBtreeUpdateParentPointers(base, np);
     763                 :         }
     764               0 :         FreePageBtreeRemovePage(fpm, btp);
     765               0 :         return;
     766                 :     }
     767                 : }
     768                 : 
     769                 : /*
     770                 :  * Find the passed page's left sibling; that is, the page at the same level
     771                 :  * of the tree whose keyspace immediately precedes ours.
     772                 :  */
     773                 : static FreePageBtree *
     774 CBC         498 : FreePageBtreeFindLeftSibling(char *base, FreePageBtree *btp)
     775                 : {
     776             498 :     FreePageBtree *p = btp;
     777             498 :     int         levels = 0;
     778                 : 
     779                 :     /* Move up until we can move left. */
     780                 :     for (;;)
     781 UBC           0 :     {
     782                 :         Size        first_page;
     783                 :         Size        index;
     784                 : 
     785 CBC         498 :         first_page = FreePageBtreeFirstKey(p);
     786             498 :         p = relptr_access(base, p->hdr.parent);
     787                 : 
     788             498 :         if (p == NULL)
     789             498 :             return NULL;        /* we were passed the rightmost page */
     790                 : 
     791 UBC           0 :         index = FreePageBtreeSearchInternal(p, first_page);
     792               0 :         if (index > 0)
     793                 :         {
     794               0 :             Assert(p->u.internal_key[index].first_page == first_page);
     795               0 :             p = relptr_access(base, p->u.internal_key[index - 1].child);
     796               0 :             break;
     797                 :         }
     798               0 :         Assert(index == 0);
     799               0 :         ++levels;
     800                 :     }
     801                 : 
     802                 :     /* Descend left. */
     803               0 :     while (levels > 0)
     804                 :     {
     805               0 :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     806               0 :         p = relptr_access(base, p->u.internal_key[p->hdr.nused - 1].child);
     807               0 :         --levels;
     808                 :     }
     809               0 :     Assert(p->hdr.magic == btp->hdr.magic);
     810                 : 
     811               0 :     return p;
     812                 : }
     813                 : 
     814                 : /*
     815                 :  * Find the passed page's right sibling; that is, the page at the same level
     816                 :  * of the tree whose keyspace immediately follows ours.
     817                 :  */
     818                 : static FreePageBtree *
     819 CBC         503 : FreePageBtreeFindRightSibling(char *base, FreePageBtree *btp)
     820                 : {
     821             503 :     FreePageBtree *p = btp;
     822             503 :     int         levels = 0;
     823                 : 
     824                 :     /* Move up until we can move right. */
     825                 :     for (;;)
     826 UBC           0 :     {
     827                 :         Size        first_page;
     828                 :         Size        index;
     829                 : 
     830 CBC         503 :         first_page = FreePageBtreeFirstKey(p);
     831             503 :         p = relptr_access(base, p->hdr.parent);
     832                 : 
     833             503 :         if (p == NULL)
     834             503 :             return NULL;        /* we were passed the rightmost page */
     835                 : 
     836 UBC           0 :         index = FreePageBtreeSearchInternal(p, first_page);
     837               0 :         if (index < p->hdr.nused - 1)
     838                 :         {
     839               0 :             Assert(p->u.internal_key[index].first_page == first_page);
     840               0 :             p = relptr_access(base, p->u.internal_key[index + 1].child);
     841               0 :             break;
     842                 :         }
     843               0 :         Assert(index == p->hdr.nused - 1);
     844               0 :         ++levels;
     845                 :     }
     846                 : 
     847                 :     /* Descend left. */
     848               0 :     while (levels > 0)
     849                 :     {
     850               0 :         Assert(p->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     851               0 :         p = relptr_access(base, p->u.internal_key[0].child);
     852               0 :         --levels;
     853                 :     }
     854               0 :     Assert(p->hdr.magic == btp->hdr.magic);
     855                 : 
     856               0 :     return p;
     857                 : }
     858                 : 
     859                 : /*
     860                 :  * Get the first key on a btree page.
     861                 :  */
     862                 : static Size
     863 CBC        1001 : FreePageBtreeFirstKey(FreePageBtree *btp)
     864                 : {
     865            1001 :     Assert(btp->hdr.nused > 0);
     866                 : 
     867            1001 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
     868            1001 :         return btp->u.leaf_key[0].first_page;
     869                 :     else
     870                 :     {
     871 UBC           0 :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     872               0 :         return btp->u.internal_key[0].first_page;
     873                 :     }
     874                 : }
     875                 : 
     876                 : /*
     877                 :  * Get a page from the btree recycle list for use as a btree page.
     878                 :  */
     879                 : static FreePageBtree *
     880 CBC         197 : FreePageBtreeGetRecycled(FreePageManager *fpm)
     881                 : {
     882             197 :     char       *base = fpm_segment_base(fpm);
     883             197 :     FreePageSpanLeader *victim = relptr_access(base, fpm->btree_recycle);
     884                 :     FreePageSpanLeader *newhead;
     885                 : 
     886             197 :     Assert(victim != NULL);
     887             197 :     newhead = relptr_access(base, victim->next);
     888             197 :     if (newhead != NULL)
     889 UBC           0 :         relptr_copy(newhead->prev, victim->prev);
     890 CBC         197 :     relptr_store(base, fpm->btree_recycle, newhead);
     891             197 :     Assert(fpm_pointer_is_page_aligned(base, victim));
     892             197 :     fpm->btree_recycle_count--;
     893             197 :     return (FreePageBtree *) victim;
     894                 : }
     895                 : 
     896                 : /*
     897                 :  * Insert an item into an internal page.
     898                 :  */
     899                 : static void
     900 UBC           0 : FreePageBtreeInsertInternal(char *base, FreePageBtree *btp, Size index,
     901                 :                             Size first_page, FreePageBtree *child)
     902                 : {
     903               0 :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
     904               0 :     Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
     905               0 :     Assert(index <= btp->hdr.nused);
     906               0 :     memmove(&btp->u.internal_key[index + 1], &btp->u.internal_key[index],
     907               0 :             sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
     908               0 :     btp->u.internal_key[index].first_page = first_page;
     909               0 :     relptr_store(base, btp->u.internal_key[index].child, child);
     910               0 :     ++btp->hdr.nused;
     911               0 : }
     912                 : 
     913                 : /*
     914                 :  * Insert an item into a leaf page.
     915                 :  */
     916                 : static void
     917 CBC         604 : FreePageBtreeInsertLeaf(FreePageBtree *btp, Size index, Size first_page,
     918                 :                         Size npages)
     919                 : {
     920             604 :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     921             604 :     Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
     922             604 :     Assert(index <= btp->hdr.nused);
     923             604 :     memmove(&btp->u.leaf_key[index + 1], &btp->u.leaf_key[index],
     924             604 :             sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     925             604 :     btp->u.leaf_key[index].first_page = first_page;
     926             604 :     btp->u.leaf_key[index].npages = npages;
     927             604 :     ++btp->hdr.nused;
     928             604 : }
     929                 : 
     930                 : /*
     931                 :  * Put a page on the btree recycle list.
     932                 :  */
     933                 : static void
     934             197 : FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
     935                 : {
     936             197 :     char       *base = fpm_segment_base(fpm);
     937             197 :     FreePageSpanLeader *head = relptr_access(base, fpm->btree_recycle);
     938                 :     FreePageSpanLeader *span;
     939                 : 
     940             197 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
     941             197 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
     942             197 :     span->npages = 1;
     943             197 :     relptr_store(base, span->next, head);
     944             197 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
     945             197 :     if (head != NULL)
     946 UBC           0 :         relptr_store(base, head->prev, span);
     947 CBC         197 :     relptr_store(base, fpm->btree_recycle, span);
     948             197 :     fpm->btree_recycle_count++;
     949             197 : }
     950                 : 
     951                 : /*
     952                 :  * Remove an item from the btree at the given position on the given page.
     953                 :  */
     954                 : static void
     955             498 : FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
     956                 : {
     957             498 :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
     958             498 :     Assert(index < btp->hdr.nused);
     959                 : 
     960                 :     /* When last item is removed, extirpate entire page from btree. */
     961             498 :     if (btp->hdr.nused == 1)
     962                 :     {
     963 UBC           0 :         FreePageBtreeRemovePage(fpm, btp);
     964               0 :         return;
     965                 :     }
     966                 : 
     967                 :     /* Physically remove the key from the page. */
     968 CBC         498 :     --btp->hdr.nused;
     969             498 :     if (index < btp->hdr.nused)
     970             491 :         memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
     971             491 :                 sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
     972                 : 
     973                 :     /* If we just removed the first key, adjust ancestor keys. */
     974             498 :     if (index == 0)
     975             178 :         FreePageBtreeAdjustAncestorKeys(fpm, btp);
     976                 : 
     977                 :     /* Consider whether to consolidate this page with a sibling. */
     978             498 :     FreePageBtreeConsolidate(fpm, btp);
     979                 : }
     980                 : 
     981                 : /*
     982                 :  * Remove a page from the btree.  Caller is responsible for having relocated
     983                 :  * any keys from this page that are still wanted.  The page is placed on the
     984                 :  * recycled list.
     985                 :  */
     986                 : static void
     987 UBC           0 : FreePageBtreeRemovePage(FreePageManager *fpm, FreePageBtree *btp)
     988                 : {
     989               0 :     char       *base = fpm_segment_base(fpm);
     990                 :     FreePageBtree *parent;
     991                 :     Size        index;
     992                 :     Size        first_page;
     993                 : 
     994                 :     for (;;)
     995                 :     {
     996                 :         /* Find parent page. */
     997               0 :         parent = relptr_access(base, btp->hdr.parent);
     998               0 :         if (parent == NULL)
     999                 :         {
    1000                 :             /* We are removing the root page. */
    1001               0 :             relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
    1002               0 :             fpm->btree_depth = 0;
    1003               0 :             Assert(fpm->singleton_first_page == 0);
    1004               0 :             Assert(fpm->singleton_npages == 0);
    1005               0 :             return;
    1006                 :         }
    1007                 : 
    1008                 :         /*
    1009                 :          * If the parent contains only one item, we need to remove it as well.
    1010                 :          */
    1011               0 :         if (parent->hdr.nused > 1)
    1012               0 :             break;
    1013               0 :         FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1014               0 :         btp = parent;
    1015                 :     }
    1016                 : 
    1017                 :     /* Find and remove the downlink. */
    1018               0 :     first_page = FreePageBtreeFirstKey(btp);
    1019               0 :     if (parent->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1020                 :     {
    1021               0 :         index = FreePageBtreeSearchLeaf(parent, first_page);
    1022               0 :         Assert(index < parent->hdr.nused);
    1023               0 :         if (index < parent->hdr.nused - 1)
    1024               0 :             memmove(&parent->u.leaf_key[index],
    1025               0 :                     &parent->u.leaf_key[index + 1],
    1026                 :                     sizeof(FreePageBtreeLeafKey)
    1027               0 :                     * (parent->hdr.nused - index - 1));
    1028                 :     }
    1029                 :     else
    1030                 :     {
    1031               0 :         index = FreePageBtreeSearchInternal(parent, first_page);
    1032               0 :         Assert(index < parent->hdr.nused);
    1033               0 :         if (index < parent->hdr.nused - 1)
    1034               0 :             memmove(&parent->u.internal_key[index],
    1035               0 :                     &parent->u.internal_key[index + 1],
    1036                 :                     sizeof(FreePageBtreeInternalKey)
    1037               0 :                     * (parent->hdr.nused - index - 1));
    1038                 :     }
    1039               0 :     parent->hdr.nused--;
    1040               0 :     Assert(parent->hdr.nused > 0);
    1041                 : 
    1042                 :     /* Recycle the page. */
    1043               0 :     FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, btp));
    1044                 : 
    1045                 :     /* Adjust ancestor keys if needed. */
    1046               0 :     if (index == 0)
    1047               0 :         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1048                 : 
    1049                 :     /* Consider whether to consolidate the parent with a sibling. */
    1050               0 :     FreePageBtreeConsolidate(fpm, parent);
    1051                 : }
    1052                 : 
    1053                 : /*
    1054                 :  * Search the btree for an entry for the given first page and initialize
    1055                 :  * *result with the results of the search.  result->page and result->index
    1056                 :  * indicate either the position of an exact match or the position at which
    1057                 :  * the new key should be inserted.  result->found is true for an exact match,
    1058                 :  * otherwise false.  result->split_pages will contain the number of additional
    1059                 :  * btree pages that will be needed when performing a split to insert a key.
    1060                 :  * Except as described above, the contents of fields in the result object are
    1061                 :  * undefined on return.
    1062                 :  */
    1063                 : static void
    1064 CBC        2851 : FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
    1065                 :                     FreePageBtreeSearchResult *result)
    1066                 : {
    1067            2851 :     char       *base = fpm_segment_base(fpm);
    1068            2851 :     FreePageBtree *btp = relptr_access(base, fpm->btree_root);
    1069                 :     Size        index;
    1070                 : 
    1071            2851 :     result->split_pages = 1;
    1072                 : 
    1073                 :     /* If the btree is empty, there's nothing to find. */
    1074            2851 :     if (btp == NULL)
    1075                 :     {
    1076 UBC           0 :         result->page = NULL;
    1077               0 :         result->found = false;
    1078               0 :         return;
    1079                 :     }
    1080                 : 
    1081                 :     /* Descend until we hit a leaf. */
    1082 CBC        2851 :     while (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1083                 :     {
    1084                 :         FreePageBtree *child;
    1085                 :         bool        found_exact;
    1086                 : 
    1087 UBC           0 :         index = FreePageBtreeSearchInternal(btp, first_page);
    1088               0 :         found_exact = index < btp->hdr.nused &&
    1089               0 :             btp->u.internal_key[index].first_page == first_page;
    1090                 : 
    1091                 :         /*
    1092                 :          * If we found an exact match we descend directly.  Otherwise, we
    1093                 :          * descend into the child to the left if possible so that we can find
    1094                 :          * the insertion point at that child's high end.
    1095                 :          */
    1096               0 :         if (!found_exact && index > 0)
    1097               0 :             --index;
    1098                 : 
    1099                 :         /* Track required split depth for leaf insert. */
    1100               0 :         if (btp->hdr.nused >= FPM_ITEMS_PER_INTERNAL_PAGE)
    1101                 :         {
    1102               0 :             Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1103               0 :             result->split_pages++;
    1104                 :         }
    1105                 :         else
    1106               0 :             result->split_pages = 0;
    1107                 : 
    1108                 :         /* Descend to appropriate child page. */
    1109               0 :         Assert(index < btp->hdr.nused);
    1110               0 :         child = relptr_access(base, btp->u.internal_key[index].child);
    1111               0 :         Assert(relptr_access(base, child->hdr.parent) == btp);
    1112               0 :         btp = child;
    1113                 :     }
    1114                 : 
    1115                 :     /* Track required split depth for leaf insert. */
    1116 CBC        2851 :     if (btp->hdr.nused >= FPM_ITEMS_PER_LEAF_PAGE)
    1117                 :     {
    1118 UBC           0 :         Assert(btp->hdr.nused == FPM_ITEMS_PER_INTERNAL_PAGE);
    1119               0 :         result->split_pages++;
    1120                 :     }
    1121                 :     else
    1122 CBC        2851 :         result->split_pages = 0;
    1123                 : 
    1124                 :     /* Search leaf page. */
    1125            2851 :     index = FreePageBtreeSearchLeaf(btp, first_page);
    1126                 : 
    1127                 :     /* Assemble results. */
    1128            2851 :     result->page = btp;
    1129            2851 :     result->index = index;
    1130            5697 :     result->found = index < btp->hdr.nused &&
    1131            2846 :         first_page == btp->u.leaf_key[index].first_page;
    1132                 : }
    1133                 : 
    1134                 : /*
    1135                 :  * Search an internal page for the first key greater than or equal to a given
    1136                 :  * page number.  Returns the index of that key, or one greater than the number
    1137                 :  * of keys on the page if none.
    1138                 :  */
    1139                 : static Size
    1140 UBC           0 : FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page)
    1141                 : {
    1142               0 :     Size        low = 0;
    1143               0 :     Size        high = btp->hdr.nused;
    1144                 : 
    1145               0 :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1146               0 :     Assert(high > 0 && high <= FPM_ITEMS_PER_INTERNAL_PAGE);
    1147                 : 
    1148               0 :     while (low < high)
    1149                 :     {
    1150               0 :         Size        mid = (low + high) / 2;
    1151               0 :         Size        val = btp->u.internal_key[mid].first_page;
    1152                 : 
    1153               0 :         if (first_page == val)
    1154               0 :             return mid;
    1155               0 :         else if (first_page < val)
    1156               0 :             high = mid;
    1157                 :         else
    1158               0 :             low = mid + 1;
    1159                 :     }
    1160                 : 
    1161               0 :     return low;
    1162                 : }
    1163                 : 
    1164                 : /*
    1165                 :  * Search a leaf page for the first key greater than or equal to a given
    1166                 :  * page number.  Returns the index of that key, or one greater than the number
    1167                 :  * of keys on the page if none.
    1168                 :  */
    1169                 : static Size
    1170 CBC        2851 : FreePageBtreeSearchLeaf(FreePageBtree *btp, Size first_page)
    1171                 : {
    1172            2851 :     Size        low = 0;
    1173            2851 :     Size        high = btp->hdr.nused;
    1174                 : 
    1175            2851 :     Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
    1176            2851 :     Assert(high > 0 && high <= FPM_ITEMS_PER_LEAF_PAGE);
    1177                 : 
    1178            6954 :     while (low < high)
    1179                 :     {
    1180            5261 :         Size        mid = (low + high) / 2;
    1181            5261 :         Size        val = btp->u.leaf_key[mid].first_page;
    1182                 : 
    1183            5261 :         if (first_page == val)
    1184            1158 :             return mid;
    1185            4103 :         else if (first_page < val)
    1186            3008 :             high = mid;
    1187                 :         else
    1188            1095 :             low = mid + 1;
    1189                 :     }
    1190                 : 
    1191            1693 :     return low;
    1192                 : }
    1193                 : 
    1194                 : /*
    1195                 :  * Allocate a new btree page and move half the keys from the provided page
    1196                 :  * to the new page.  Caller is responsible for making sure that there's a
    1197                 :  * page available from fpm->btree_recycle.  Returns a pointer to the new page,
    1198                 :  * to which caller must add a downlink.
    1199                 :  */
    1200                 : static FreePageBtree *
    1201 UBC           0 : FreePageBtreeSplitPage(FreePageManager *fpm, FreePageBtree *btp)
    1202                 : {
    1203                 :     FreePageBtree *newsibling;
    1204                 : 
    1205               0 :     newsibling = FreePageBtreeGetRecycled(fpm);
    1206               0 :     newsibling->hdr.magic = btp->hdr.magic;
    1207               0 :     newsibling->hdr.nused = btp->hdr.nused / 2;
    1208               0 :     relptr_copy(newsibling->hdr.parent, btp->hdr.parent);
    1209               0 :     btp->hdr.nused -= newsibling->hdr.nused;
    1210                 : 
    1211               0 :     if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
    1212               0 :         memcpy(&newsibling->u.leaf_key,
    1213               0 :                &btp->u.leaf_key[btp->hdr.nused],
    1214               0 :                sizeof(FreePageBtreeLeafKey) * newsibling->hdr.nused);
    1215                 :     else
    1216                 :     {
    1217               0 :         Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1218               0 :         memcpy(&newsibling->u.internal_key,
    1219               0 :                &btp->u.internal_key[btp->hdr.nused],
    1220               0 :                sizeof(FreePageBtreeInternalKey) * newsibling->hdr.nused);
    1221               0 :         FreePageBtreeUpdateParentPointers(fpm_segment_base(fpm), newsibling);
    1222                 :     }
    1223                 : 
    1224               0 :     return newsibling;
    1225                 : }
    1226                 : 
    1227                 : /*
    1228                 :  * When internal pages are split or merged, the parent pointers of their
    1229                 :  * children must be updated.
    1230                 :  */
    1231                 : static void
    1232               0 : FreePageBtreeUpdateParentPointers(char *base, FreePageBtree *btp)
    1233                 : {
    1234                 :     Size        i;
    1235                 : 
    1236               0 :     Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
    1237               0 :     for (i = 0; i < btp->hdr.nused; ++i)
    1238                 :     {
    1239                 :         FreePageBtree *child;
    1240                 : 
    1241               0 :         child = relptr_access(base, btp->u.internal_key[i].child);
    1242               0 :         relptr_store(base, child->hdr.parent, btp);
    1243                 :     }
    1244               0 : }
    1245                 : 
    1246                 : /*
    1247                 :  * Debugging dump of btree data.
    1248                 :  */
    1249                 : static void
    1250               0 : FreePageManagerDumpBtree(FreePageManager *fpm, FreePageBtree *btp,
    1251                 :                          FreePageBtree *parent, int level, StringInfo buf)
    1252                 : {
    1253               0 :     char       *base = fpm_segment_base(fpm);
    1254               0 :     Size        pageno = fpm_pointer_to_page(base, btp);
    1255                 :     Size        index;
    1256                 :     FreePageBtree *check_parent;
    1257                 : 
    1258               0 :     check_stack_depth();
    1259               0 :     check_parent = relptr_access(base, btp->hdr.parent);
    1260               0 :     appendStringInfo(buf, "  %zu@%d %c", pageno, level,
    1261               0 :                      btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC ? 'i' : 'l');
    1262               0 :     if (parent != check_parent)
    1263               0 :         appendStringInfo(buf, " [actual parent %zu, expected %zu]",
    1264               0 :                          fpm_pointer_to_page(base, check_parent),
    1265               0 :                          fpm_pointer_to_page(base, parent));
    1266               0 :     appendStringInfoChar(buf, ':');
    1267               0 :     for (index = 0; index < btp->hdr.nused; ++index)
    1268                 :     {
    1269               0 :         if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1270               0 :             appendStringInfo(buf, " %zu->%zu",
    1271                 :                              btp->u.internal_key[index].first_page,
    1272               0 :                              relptr_offset(btp->u.internal_key[index].child) / FPM_PAGE_SIZE);
    1273                 :         else
    1274               0 :             appendStringInfo(buf, " %zu(%zu)",
    1275                 :                              btp->u.leaf_key[index].first_page,
    1276                 :                              btp->u.leaf_key[index].npages);
    1277                 :     }
    1278               0 :     appendStringInfoChar(buf, '\n');
    1279                 : 
    1280               0 :     if (btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC)
    1281                 :     {
    1282               0 :         for (index = 0; index < btp->hdr.nused; ++index)
    1283                 :         {
    1284                 :             FreePageBtree *child;
    1285                 : 
    1286               0 :             child = relptr_access(base, btp->u.internal_key[index].child);
    1287               0 :             FreePageManagerDumpBtree(fpm, child, btp, level + 1, buf);
    1288                 :         }
    1289                 :     }
    1290               0 : }
    1291                 : 
    1292                 : /*
    1293                 :  * Debugging dump of free-span data.
    1294                 :  */
    1295                 : static void
    1296               0 : FreePageManagerDumpSpans(FreePageManager *fpm, FreePageSpanLeader *span,
    1297                 :                          Size expected_pages, StringInfo buf)
    1298                 : {
    1299               0 :     char       *base = fpm_segment_base(fpm);
    1300                 : 
    1301               0 :     while (span != NULL)
    1302                 :     {
    1303               0 :         if (span->npages != expected_pages)
    1304               0 :             appendStringInfo(buf, " %zu(%zu)", fpm_pointer_to_page(base, span),
    1305                 :                              span->npages);
    1306                 :         else
    1307               0 :             appendStringInfo(buf, " %zu", fpm_pointer_to_page(base, span));
    1308               0 :         span = relptr_access(base, span->next);
    1309                 :     }
    1310                 : 
    1311               0 :     appendStringInfoChar(buf, '\n');
    1312               0 : }
    1313                 : 
    1314                 : /*
    1315                 :  * This function allocates a run of pages of the given length from the free
    1316                 :  * page manager.
    1317                 :  */
    1318                 : static bool
    1319 CBC       16078 : FreePageManagerGetInternal(FreePageManager *fpm, Size npages, Size *first_page)
    1320                 : {
    1321           16078 :     char       *base = fpm_segment_base(fpm);
    1322           16078 :     FreePageSpanLeader *victim = NULL;
    1323                 :     FreePageSpanLeader *prev;
    1324                 :     FreePageSpanLeader *next;
    1325                 :     FreePageBtreeSearchResult result;
    1326           16078 :     Size        victim_page = 0;    /* placate compiler */
    1327                 :     Size        f;
    1328                 : 
    1329                 :     /*
    1330                 :      * Search for a free span.
    1331                 :      *
    1332                 :      * Right now, we use a simple best-fit policy here, but it's possible for
    1333                 :      * this to result in memory fragmentation if we're repeatedly asked to
    1334                 :      * allocate chunks just a little smaller than what we have available.
    1335                 :      * Hopefully, this is unlikely, because we expect most requests to be
    1336                 :      * single pages or superblock-sized chunks -- but no policy can be optimal
    1337                 :      * under all circumstances unless it has knowledge of future allocation
    1338                 :      * patterns.
    1339                 :      */
    1340         1146931 :     for (f = Min(npages, FPM_NUM_FREELISTS) - 1; f < FPM_NUM_FREELISTS; ++f)
    1341                 :     {
    1342                 :         /* Skip empty freelists. */
    1343         1146931 :         if (relptr_is_null(fpm->freelist[f]))
    1344         1130853 :             continue;
    1345                 : 
    1346                 :         /*
    1347                 :          * All of the freelists except the last one contain only items of a
    1348                 :          * single size, so we just take the first one.  But the final free
    1349                 :          * list contains everything too big for any of the other lists, so we
    1350                 :          * need to search the list.
    1351                 :          */
    1352           16078 :         if (f < FPM_NUM_FREELISTS - 1)
    1353            8889 :             victim = relptr_access(base, fpm->freelist[f]);
    1354                 :         else
    1355                 :         {
    1356                 :             FreePageSpanLeader *candidate;
    1357                 : 
    1358            7189 :             candidate = relptr_access(base, fpm->freelist[f]);
    1359                 :             do
    1360                 :             {
    1361            7189 :                 if (candidate->npages >= npages && (victim == NULL ||
    1362 UBC           0 :                                                     victim->npages > candidate->npages))
    1363                 :                 {
    1364 CBC        7189 :                     victim = candidate;
    1365            7189 :                     if (victim->npages == npages)
    1366 UBC           0 :                         break;
    1367                 :                 }
    1368 CBC        7189 :                 candidate = relptr_access(base, candidate->next);
    1369            7189 :             } while (candidate != NULL);
    1370                 :         }
    1371           16078 :         break;
    1372                 :     }
    1373                 : 
    1374                 :     /* If we didn't find an allocatable span, return failure. */
    1375           16078 :     if (victim == NULL)
    1376 UBC           0 :         return false;
    1377                 : 
    1378                 :     /* Remove span from free list. */
    1379 CBC       16078 :     Assert(victim->magic == FREE_PAGE_SPAN_LEADER_MAGIC);
    1380           16078 :     prev = relptr_access(base, victim->prev);
    1381           16078 :     next = relptr_access(base, victim->next);
    1382           16078 :     if (prev != NULL)
    1383 UBC           0 :         relptr_copy(prev->next, victim->next);
    1384                 :     else
    1385 CBC       16078 :         relptr_copy(fpm->freelist[f], victim->next);
    1386           16078 :     if (next != NULL)
    1387             107 :         relptr_copy(next->prev, victim->prev);
    1388           16078 :     victim_page = fpm_pointer_to_page(base, victim);
    1389                 : 
    1390                 :     /* Decide whether we might be invalidating contiguous_pages. */
    1391           16078 :     if (f == FPM_NUM_FREELISTS - 1 &&
    1392            7189 :         victim->npages == fpm->contiguous_pages)
    1393                 :     {
    1394                 :         /*
    1395                 :          * The victim span came from the oversized freelist, and had the same
    1396                 :          * size as the longest span.  There may or may not be another one of
    1397                 :          * the same size, so contiguous_pages must be recomputed just to be
    1398                 :          * safe.
    1399                 :          */
    1400            7189 :         fpm->contiguous_pages_dirty = true;
    1401                 :     }
    1402            8889 :     else if (f + 1 == fpm->contiguous_pages &&
    1403            7996 :              relptr_is_null(fpm->freelist[f]))
    1404                 :     {
    1405                 :         /*
    1406                 :          * The victim span came from a fixed sized freelist, and it was the
    1407                 :          * list for spans of the same size as the current longest span, and
    1408                 :          * the list is now empty after removing the victim.  So
    1409                 :          * contiguous_pages must be recomputed without a doubt.
    1410                 :          */
    1411            7996 :         fpm->contiguous_pages_dirty = true;
    1412                 :     }
    1413                 : 
    1414                 :     /*
    1415                 :      * If we haven't initialized the btree yet, the victim must be the single
    1416                 :      * span stored within the FreePageManager itself.  Otherwise, we need to
    1417                 :      * update the btree.
    1418                 :      */
    1419           16078 :     if (relptr_is_null(fpm->btree_root))
    1420                 :     {
    1421           14920 :         Assert(victim_page == fpm->singleton_first_page);
    1422           14920 :         Assert(victim->npages == fpm->singleton_npages);
    1423           14920 :         Assert(victim->npages >= npages);
    1424           14920 :         fpm->singleton_first_page += npages;
    1425           14920 :         fpm->singleton_npages -= npages;
    1426           14920 :         if (fpm->singleton_npages > 0)
    1427           14905 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1428                 :                                    fpm->singleton_npages);
    1429                 :     }
    1430                 :     else
    1431                 :     {
    1432                 :         /*
    1433                 :          * If the span we found is exactly the right size, remove it from the
    1434                 :          * btree completely.  Otherwise, adjust the btree entry to reflect the
    1435                 :          * still-unallocated portion of the span, and put that portion on the
    1436                 :          * appropriate free list.
    1437                 :          */
    1438            1158 :         FreePageBtreeSearch(fpm, victim_page, &result);
    1439            1158 :         Assert(result.found);
    1440            1158 :         if (victim->npages == npages)
    1441             365 :             FreePageBtreeRemove(fpm, result.page, result.index);
    1442                 :         else
    1443                 :         {
    1444                 :             FreePageBtreeLeafKey *key;
    1445                 : 
    1446                 :             /* Adjust btree to reflect remaining pages. */
    1447             793 :             Assert(victim->npages > npages);
    1448             793 :             key = &result.page->u.leaf_key[result.index];
    1449             793 :             Assert(key->npages == victim->npages);
    1450             793 :             key->first_page += npages;
    1451             793 :             key->npages -= npages;
    1452             793 :             if (result.index == 0)
    1453             360 :                 FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1454                 : 
    1455                 :             /* Put the unallocated pages back on the appropriate free list. */
    1456             793 :             FreePagePushSpanLeader(fpm, victim_page + npages,
    1457             793 :                                    victim->npages - npages);
    1458                 :         }
    1459                 :     }
    1460                 : 
    1461                 :     /* Return results to caller. */
    1462           16078 :     *first_page = fpm_pointer_to_page(base, victim);
    1463           16078 :     return true;
    1464                 : }
    1465                 : 
    1466                 : /*
    1467                 :  * Put a range of pages into the btree and freelists, consolidating it with
    1468                 :  * existing free spans just before and/or after it.  If 'soft' is true,
    1469                 :  * only perform the insertion if it can be done without allocating new btree
    1470                 :  * pages; if false, do it always.  Returns 0 if the soft flag caused the
    1471                 :  * insertion to be skipped, or otherwise the size of the contiguous span
    1472                 :  * created by the insertion.  This may be larger than npages if we're able
    1473                 :  * to consolidate with an adjacent range.
    1474                 :  */
    1475                 : static Size
    1476            5600 : FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
    1477                 :                            bool soft)
    1478                 : {
    1479            5600 :     char       *base = fpm_segment_base(fpm);
    1480                 :     FreePageBtreeSearchResult result;
    1481            5600 :     FreePageBtreeLeafKey *prevkey = NULL;
    1482            5600 :     FreePageBtreeLeafKey *nextkey = NULL;
    1483                 :     FreePageBtree *np;
    1484                 :     Size        nindex;
    1485                 : 
    1486            5600 :     Assert(npages > 0);
    1487                 : 
    1488                 :     /* We can store a single free span without initializing the btree. */
    1489            5600 :     if (fpm->btree_depth == 0)
    1490                 :     {
    1491            4094 :         if (fpm->singleton_npages == 0)
    1492                 :         {
    1493                 :             /* Don't have a span yet; store this one. */
    1494            3234 :             fpm->singleton_first_page = first_page;
    1495            3234 :             fpm->singleton_npages = npages;
    1496            3234 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1497            3234 :             return fpm->singleton_npages;
    1498                 :         }
    1499             860 :         else if (fpm->singleton_first_page + fpm->singleton_npages ==
    1500                 :                  first_page)
    1501                 :         {
    1502                 :             /* New span immediately follows sole existing span. */
    1503               5 :             fpm->singleton_npages += npages;
    1504               5 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1505               5 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1506                 :                                    fpm->singleton_npages);
    1507               5 :             return fpm->singleton_npages;
    1508                 :         }
    1509             855 :         else if (first_page + npages == fpm->singleton_first_page)
    1510                 :         {
    1511                 :             /* New span immediately precedes sole existing span. */
    1512             560 :             FreePagePopSpanLeader(fpm, fpm->singleton_first_page);
    1513             560 :             fpm->singleton_first_page = first_page;
    1514             560 :             fpm->singleton_npages += npages;
    1515             560 :             FreePagePushSpanLeader(fpm, fpm->singleton_first_page,
    1516                 :                                    fpm->singleton_npages);
    1517             560 :             return fpm->singleton_npages;
    1518                 :         }
    1519                 :         else
    1520                 :         {
    1521                 :             /* Not contiguous; we need to initialize the btree. */
    1522                 :             Size        root_page;
    1523                 :             FreePageBtree *root;
    1524                 : 
    1525             295 :             if (!relptr_is_null(fpm->btree_recycle))
    1526              62 :                 root = FreePageBtreeGetRecycled(fpm);
    1527             233 :             else if (soft)
    1528             108 :                 return 0;       /* Should not allocate if soft. */
    1529             140 :             else if (FreePageManagerGetInternal(fpm, 1, &root_page))
    1530             140 :                 root = (FreePageBtree *) fpm_page_to_pointer(base, root_page);
    1531                 :             else
    1532                 :             {
    1533                 :                 /* We'd better be able to get a page from the existing range. */
    1534 UBC           0 :                 elog(FATAL, "free page manager btree is corrupt");
    1535                 :             }
    1536                 : 
    1537                 :             /* Create the btree and move the preexisting range into it. */
    1538 CBC         202 :             root->hdr.magic = FREE_PAGE_LEAF_MAGIC;
    1539             202 :             root->hdr.nused = 1;
    1540             202 :             relptr_store(base, root->hdr.parent, (FreePageBtree *) NULL);
    1541             202 :             root->u.leaf_key[0].first_page = fpm->singleton_first_page;
    1542             202 :             root->u.leaf_key[0].npages = fpm->singleton_npages;
    1543             202 :             relptr_store(base, fpm->btree_root, root);
    1544             202 :             fpm->singleton_first_page = 0;
    1545             202 :             fpm->singleton_npages = 0;
    1546             202 :             fpm->btree_depth = 1;
    1547                 : 
    1548                 :             /*
    1549                 :              * Corner case: it may be that the btree root took the very last
    1550                 :              * free page.  In that case, the sole btree entry covers a zero
    1551                 :              * page run, which is invalid.  Overwrite it with the entry we're
    1552                 :              * trying to insert and get out.
    1553                 :              */
    1554             202 :             if (root->u.leaf_key[0].npages == 0)
    1555                 :             {
    1556              15 :                 root->u.leaf_key[0].first_page = first_page;
    1557              15 :                 root->u.leaf_key[0].npages = npages;
    1558              15 :                 FreePagePushSpanLeader(fpm, first_page, npages);
    1559              15 :                 return npages;
    1560                 :             }
    1561                 : 
    1562                 :             /* Fall through to insert the new key. */
    1563                 :         }
    1564                 :     }
    1565                 : 
    1566                 :     /* Search the btree. */
    1567            1693 :     FreePageBtreeSearch(fpm, first_page, &result);
    1568            1693 :     Assert(!result.found);
    1569            1693 :     if (result.index > 0)
    1570            1060 :         prevkey = &result.page->u.leaf_key[result.index - 1];
    1571            1693 :     if (result.index < result.page->hdr.nused)
    1572                 :     {
    1573            1688 :         np = result.page;
    1574            1688 :         nindex = result.index;
    1575            1688 :         nextkey = &result.page->u.leaf_key[result.index];
    1576                 :     }
    1577                 :     else
    1578                 :     {
    1579               5 :         np = FreePageBtreeFindRightSibling(base, result.page);
    1580               5 :         nindex = 0;
    1581               5 :         if (np != NULL)
    1582 UBC           0 :             nextkey = &np->u.leaf_key[0];
    1583                 :     }
    1584                 : 
    1585                 :     /* Consolidate with the previous entry if possible. */
    1586 CBC        1693 :     if (prevkey != NULL && prevkey->first_page + prevkey->npages >= first_page)
    1587                 :     {
    1588             259 :         bool        remove_next = false;
    1589                 :         Size        result;
    1590                 : 
    1591             259 :         Assert(prevkey->first_page + prevkey->npages == first_page);
    1592             259 :         prevkey->npages = (first_page - prevkey->first_page) + npages;
    1593                 : 
    1594                 :         /* Check whether we can *also* consolidate with the following entry. */
    1595             259 :         if (nextkey != NULL &&
    1596             254 :             prevkey->first_page + prevkey->npages >= nextkey->first_page)
    1597                 :         {
    1598             133 :             Assert(prevkey->first_page + prevkey->npages ==
    1599                 :                    nextkey->first_page);
    1600             133 :             prevkey->npages = (nextkey->first_page - prevkey->first_page)
    1601             133 :                 + nextkey->npages;
    1602             133 :             FreePagePopSpanLeader(fpm, nextkey->first_page);
    1603             133 :             remove_next = true;
    1604                 :         }
    1605                 : 
    1606                 :         /* Put the span on the correct freelist and save size. */
    1607             259 :         FreePagePopSpanLeader(fpm, prevkey->first_page);
    1608             259 :         FreePagePushSpanLeader(fpm, prevkey->first_page, prevkey->npages);
    1609             259 :         result = prevkey->npages;
    1610                 : 
    1611                 :         /*
    1612                 :          * If we consolidated with both the preceding and following entries,
    1613                 :          * we must remove the following entry.  We do this last, because
    1614                 :          * removing an element from the btree may invalidate pointers we hold
    1615                 :          * into the current data structure.
    1616                 :          *
    1617                 :          * NB: The btree is technically in an invalid state a this point
    1618                 :          * because we've already updated prevkey to cover the same key space
    1619                 :          * as nextkey.  FreePageBtreeRemove() shouldn't notice that, though.
    1620                 :          */
    1621             259 :         if (remove_next)
    1622             133 :             FreePageBtreeRemove(fpm, np, nindex);
    1623                 : 
    1624             259 :         return result;
    1625                 :     }
    1626                 : 
    1627                 :     /* Consolidate with the next entry if possible. */
    1628            1434 :     if (nextkey != NULL && first_page + npages >= nextkey->first_page)
    1629                 :     {
    1630                 :         Size        newpages;
    1631                 : 
    1632                 :         /* Compute new size for span. */
    1633             830 :         Assert(first_page + npages == nextkey->first_page);
    1634             830 :         newpages = (nextkey->first_page - first_page) + nextkey->npages;
    1635                 : 
    1636                 :         /* Put span on correct free list. */
    1637             830 :         FreePagePopSpanLeader(fpm, nextkey->first_page);
    1638             830 :         FreePagePushSpanLeader(fpm, first_page, newpages);
    1639                 : 
    1640                 :         /* Update key in place. */
    1641             830 :         nextkey->first_page = first_page;
    1642             830 :         nextkey->npages = newpages;
    1643                 : 
    1644                 :         /* If reducing first key on page, ancestors might need adjustment. */
    1645             830 :         if (nindex == 0)
    1646             304 :             FreePageBtreeAdjustAncestorKeys(fpm, np);
    1647                 : 
    1648             830 :         return nextkey->npages;
    1649                 :     }
    1650                 : 
    1651                 :     /* Split leaf page and as many of its ancestors as necessary. */
    1652             604 :     if (result.split_pages > 0)
    1653                 :     {
    1654                 :         /*
    1655                 :          * NB: We could consider various coping strategies here to avoid a
    1656                 :          * split; most obviously, if np != result.page, we could target that
    1657                 :          * page instead.   More complicated shuffling strategies could be
    1658                 :          * possible as well; basically, unless every single leaf page is 100%
    1659                 :          * full, we can jam this key in there if we try hard enough.  It's
    1660                 :          * unlikely that trying that hard is worthwhile, but it's possible we
    1661                 :          * might need to make more than no effort.  For now, we just do the
    1662                 :          * easy thing, which is nothing.
    1663                 :          */
    1664                 : 
    1665                 :         /* If this is a soft insert, it's time to give up. */
    1666 UBC           0 :         if (soft)
    1667               0 :             return 0;
    1668                 : 
    1669                 :         /* Check whether we need to allocate more btree pages to split. */
    1670               0 :         if (result.split_pages > fpm->btree_recycle_count)
    1671                 :         {
    1672                 :             Size        pages_needed;
    1673                 :             Size        recycle_page;
    1674                 :             Size        i;
    1675                 : 
    1676                 :             /*
    1677                 :              * Allocate the required number of pages and split each one in
    1678                 :              * turn.  This should never fail, because if we've got enough
    1679                 :              * spans of free pages kicking around that we need additional
    1680                 :              * storage space just to remember them all, then we should
    1681                 :              * certainly have enough to expand the btree, which should only
    1682                 :              * ever use a tiny number of pages compared to the number under
    1683                 :              * management.  If it does, something's badly screwed up.
    1684                 :              */
    1685               0 :             pages_needed = result.split_pages - fpm->btree_recycle_count;
    1686               0 :             for (i = 0; i < pages_needed; ++i)
    1687                 :             {
    1688               0 :                 if (!FreePageManagerGetInternal(fpm, 1, &recycle_page))
    1689               0 :                     elog(FATAL, "free page manager btree is corrupt");
    1690               0 :                 FreePageBtreeRecycle(fpm, recycle_page);
    1691                 :             }
    1692                 : 
    1693                 :             /*
    1694                 :              * The act of allocating pages to recycle may have invalidated the
    1695                 :              * results of our previous btree research, so repeat it. (We could
    1696                 :              * recheck whether any of our split-avoidance strategies that were
    1697                 :              * not viable before now are, but it hardly seems worthwhile, so
    1698                 :              * we don't bother. Consolidation can't be possible now if it
    1699                 :              * wasn't previously.)
    1700                 :              */
    1701               0 :             FreePageBtreeSearch(fpm, first_page, &result);
    1702                 : 
    1703                 :             /*
    1704                 :              * The act of allocating pages for use in constructing our btree
    1705                 :              * should never cause any page to become more full, so the new
    1706                 :              * split depth should be no greater than the old one, and perhaps
    1707                 :              * less if we fortuitously allocated a chunk that freed up a slot
    1708                 :              * on the page we need to update.
    1709                 :              */
    1710               0 :             Assert(result.split_pages <= fpm->btree_recycle_count);
    1711                 :         }
    1712                 : 
    1713                 :         /* If we still need to perform a split, do it. */
    1714               0 :         if (result.split_pages > 0)
    1715                 :         {
    1716               0 :             FreePageBtree *split_target = result.page;
    1717               0 :             FreePageBtree *child = NULL;
    1718               0 :             Size        key = first_page;
    1719                 : 
    1720                 :             for (;;)
    1721               0 :             {
    1722                 :                 FreePageBtree *newsibling;
    1723                 :                 FreePageBtree *parent;
    1724                 : 
    1725                 :                 /* Identify parent page, which must receive downlink. */
    1726               0 :                 parent = relptr_access(base, split_target->hdr.parent);
    1727                 : 
    1728                 :                 /* Split the page - downlink not added yet. */
    1729               0 :                 newsibling = FreePageBtreeSplitPage(fpm, split_target);
    1730                 : 
    1731                 :                 /*
    1732                 :                  * At this point in the loop, we're always carrying a pending
    1733                 :                  * insertion.  On the first pass, it's the actual key we're
    1734                 :                  * trying to insert; on subsequent passes, it's the downlink
    1735                 :                  * that needs to be added as a result of the split performed
    1736                 :                  * during the previous loop iteration.  Since we've just split
    1737                 :                  * the page, there's definitely room on one of the two
    1738                 :                  * resulting pages.
    1739                 :                  */
    1740               0 :                 if (child == NULL)
    1741                 :                 {
    1742                 :                     Size        index;
    1743                 :                     FreePageBtree *insert_into;
    1744                 : 
    1745               0 :                     insert_into = key < newsibling->u.leaf_key[0].first_page ?
    1746               0 :                         split_target : newsibling;
    1747               0 :                     index = FreePageBtreeSearchLeaf(insert_into, key);
    1748               0 :                     FreePageBtreeInsertLeaf(insert_into, index, key, npages);
    1749               0 :                     if (index == 0 && insert_into == split_target)
    1750               0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1751                 :                 }
    1752                 :                 else
    1753                 :                 {
    1754                 :                     Size        index;
    1755                 :                     FreePageBtree *insert_into;
    1756                 : 
    1757               0 :                     insert_into =
    1758               0 :                         key < newsibling->u.internal_key[0].first_page ?
    1759               0 :                         split_target : newsibling;
    1760               0 :                     index = FreePageBtreeSearchInternal(insert_into, key);
    1761               0 :                     FreePageBtreeInsertInternal(base, insert_into, index,
    1762                 :                                                 key, child);
    1763               0 :                     relptr_store(base, child->hdr.parent, insert_into);
    1764               0 :                     if (index == 0 && insert_into == split_target)
    1765               0 :                         FreePageBtreeAdjustAncestorKeys(fpm, split_target);
    1766                 :                 }
    1767                 : 
    1768                 :                 /* If the page we just split has no parent, split the root. */
    1769               0 :                 if (parent == NULL)
    1770                 :                 {
    1771                 :                     FreePageBtree *newroot;
    1772                 : 
    1773               0 :                     newroot = FreePageBtreeGetRecycled(fpm);
    1774               0 :                     newroot->hdr.magic = FREE_PAGE_INTERNAL_MAGIC;
    1775               0 :                     newroot->hdr.nused = 2;
    1776               0 :                     relptr_store(base, newroot->hdr.parent,
    1777                 :                                  (FreePageBtree *) NULL);
    1778               0 :                     newroot->u.internal_key[0].first_page =
    1779               0 :                         FreePageBtreeFirstKey(split_target);
    1780               0 :                     relptr_store(base, newroot->u.internal_key[0].child,
    1781                 :                                  split_target);
    1782               0 :                     relptr_store(base, split_target->hdr.parent, newroot);
    1783               0 :                     newroot->u.internal_key[1].first_page =
    1784               0 :                         FreePageBtreeFirstKey(newsibling);
    1785               0 :                     relptr_store(base, newroot->u.internal_key[1].child,
    1786                 :                                  newsibling);
    1787               0 :                     relptr_store(base, newsibling->hdr.parent, newroot);
    1788               0 :                     relptr_store(base, fpm->btree_root, newroot);
    1789               0 :                     fpm->btree_depth++;
    1790                 : 
    1791               0 :                     break;
    1792                 :                 }
    1793                 : 
    1794                 :                 /* If the parent page isn't full, insert the downlink. */
    1795               0 :                 key = newsibling->u.internal_key[0].first_page;
    1796               0 :                 if (parent->hdr.nused < FPM_ITEMS_PER_INTERNAL_PAGE)
    1797                 :                 {
    1798                 :                     Size        index;
    1799                 : 
    1800               0 :                     index = FreePageBtreeSearchInternal(parent, key);
    1801               0 :                     FreePageBtreeInsertInternal(base, parent, index,
    1802                 :                                                 key, newsibling);
    1803               0 :                     relptr_store(base, newsibling->hdr.parent, parent);
    1804               0 :                     if (index == 0)
    1805               0 :                         FreePageBtreeAdjustAncestorKeys(fpm, parent);
    1806               0 :                     break;
    1807                 :                 }
    1808                 : 
    1809                 :                 /* The parent also needs to be split, so loop around. */
    1810               0 :                 child = newsibling;
    1811               0 :                 split_target = parent;
    1812                 :             }
    1813                 : 
    1814                 :             /*
    1815                 :              * The loop above did the insert, so just need to update the free
    1816                 :              * list, and we're done.
    1817                 :              */
    1818               0 :             FreePagePushSpanLeader(fpm, first_page, npages);
    1819                 : 
    1820               0 :             return npages;
    1821                 :         }
    1822                 :     }
    1823                 : 
    1824                 :     /* Physically add the key to the page. */
    1825 CBC         604 :     Assert(result.page->hdr.nused < FPM_ITEMS_PER_LEAF_PAGE);
    1826             604 :     FreePageBtreeInsertLeaf(result.page, result.index, first_page, npages);
    1827                 : 
    1828                 :     /* If new first key on page, ancestors might need adjustment. */
    1829             604 :     if (result.index == 0)
    1830             329 :         FreePageBtreeAdjustAncestorKeys(fpm, result.page);
    1831                 : 
    1832                 :     /* Put it on the free list. */
    1833             604 :     FreePagePushSpanLeader(fpm, first_page, npages);
    1834                 : 
    1835             604 :     return npages;
    1836                 : }
    1837                 : 
    1838                 : /*
    1839                 :  * Remove a FreePageSpanLeader from the linked-list that contains it, either
    1840                 :  * because we're changing the size of the span, or because we're allocating it.
    1841                 :  */
    1842                 : static void
    1843            1903 : FreePagePopSpanLeader(FreePageManager *fpm, Size pageno)
    1844                 : {
    1845            1903 :     char       *base = fpm_segment_base(fpm);
    1846                 :     FreePageSpanLeader *span;
    1847                 :     FreePageSpanLeader *next;
    1848                 :     FreePageSpanLeader *prev;
    1849                 : 
    1850            1903 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, pageno);
    1851                 : 
    1852            1903 :     next = relptr_access(base, span->next);
    1853            1903 :     prev = relptr_access(base, span->prev);
    1854            1903 :     if (next != NULL)
    1855             121 :         relptr_copy(next->prev, span->prev);
    1856            1903 :     if (prev != NULL)
    1857              41 :         relptr_copy(prev->next, span->next);
    1858                 :     else
    1859                 :     {
    1860            1862 :         Size        f = Min(span->npages, FPM_NUM_FREELISTS) - 1;
    1861                 : 
    1862            1862 :         Assert(relptr_offset(fpm->freelist[f]) == pageno * FPM_PAGE_SIZE);
    1863            1862 :         relptr_copy(fpm->freelist[f], span->next);
    1864                 :     }
    1865            1903 : }
    1866                 : 
    1867                 : /*
    1868                 :  * Initialize a new FreePageSpanLeader and put it on the appropriate free list.
    1869                 :  */
    1870                 : static void
    1871           21263 : FreePagePushSpanLeader(FreePageManager *fpm, Size first_page, Size npages)
    1872                 : {
    1873           21263 :     char       *base = fpm_segment_base(fpm);
    1874           21263 :     Size        f = Min(npages, FPM_NUM_FREELISTS) - 1;
    1875           21263 :     FreePageSpanLeader *head = relptr_access(base, fpm->freelist[f]);
    1876                 :     FreePageSpanLeader *span;
    1877                 : 
    1878           21263 :     span = (FreePageSpanLeader *) fpm_page_to_pointer(base, first_page);
    1879           21263 :     span->magic = FREE_PAGE_SPAN_LEADER_MAGIC;
    1880           21263 :     span->npages = npages;
    1881           21263 :     relptr_store(base, span->next, head);
    1882           21263 :     relptr_store(base, span->prev, (FreePageSpanLeader *) NULL);
    1883           21263 :     if (head != NULL)
    1884             265 :         relptr_store(base, head->prev, span);
    1885           21263 :     relptr_store(base, fpm->freelist[f], span);
    1886           21263 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a