LCOV - differential code coverage report
Current view: top level - src/backend/access/nbtree - nbtsplitloc.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 95.8 % 287 275 12 275
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 13 13 13
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * nbtsplitloc.c
       4                 :  *    Choose split point code for Postgres btree implementation.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/access/nbtree/nbtsplitloc.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "access/nbtree.h"
      18                 : #include "storage/lmgr.h"
      19                 : 
      20                 : typedef enum
      21                 : {
      22                 :     /* strategy for searching through materialized list of split points */
      23                 :     SPLIT_DEFAULT,              /* give some weight to truncation */
      24                 :     SPLIT_MANY_DUPLICATES,      /* find minimally distinguishing point */
      25                 :     SPLIT_SINGLE_VALUE          /* leave left page almost full */
      26                 : } FindSplitStrat;
      27                 : 
      28                 : typedef struct
      29                 : {
      30                 :     /* details of free space left by split */
      31                 :     int16       curdelta;       /* current leftfree/rightfree delta */
      32                 :     int16       leftfree;       /* space left on left page post-split */
      33                 :     int16       rightfree;      /* space left on right page post-split */
      34                 : 
      35                 :     /* split point identifying fields (returned by _bt_findsplitloc) */
      36                 :     OffsetNumber firstrightoff; /* first origpage item on rightpage */
      37                 :     bool        newitemonleft;  /* new item goes on left, or right? */
      38                 : } SplitPoint;
      39                 : 
      40                 : typedef struct
      41                 : {
      42                 :     /* context data for _bt_recsplitloc */
      43                 :     Relation    rel;            /* index relation */
      44                 :     Page        origpage;       /* page undergoing split */
      45                 :     IndexTuple  newitem;        /* new item (cause of page split) */
      46                 :     Size        newitemsz;      /* size of newitem (includes line pointer) */
      47                 :     bool        is_leaf;        /* T if splitting a leaf page */
      48                 :     bool        is_rightmost;   /* T if splitting rightmost page on level */
      49                 :     OffsetNumber newitemoff;    /* where the new item is to be inserted */
      50                 :     int         leftspace;      /* space available for items on left page */
      51                 :     int         rightspace;     /* space available for items on right page */
      52                 :     int         olddataitemstotal;  /* space taken by old items */
      53                 :     Size        minfirstrightsz;    /* smallest firstright size */
      54                 : 
      55                 :     /* candidate split point data */
      56                 :     int         maxsplits;      /* maximum number of splits */
      57                 :     int         nsplits;        /* current number of splits */
      58                 :     SplitPoint *splits;         /* all candidate split points for page */
      59                 :     int         interval;       /* current range of acceptable split points */
      60                 : } FindSplitData;
      61                 : 
      62                 : static void _bt_recsplitloc(FindSplitData *state,
      63                 :                             OffsetNumber firstrightoff, bool newitemonleft,
      64                 :                             int olddataitemstoleft,
      65                 :                             Size firstrightofforigpagetuplesz);
      66                 : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
      67                 :                                 bool usemult);
      68                 : static int  _bt_splitcmp(const void *arg1, const void *arg2);
      69                 : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
      70                 :                                 int leaffillfactor, bool *usemult);
      71                 : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
      72                 : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
      73                 :                                      bool *newitemonleft, FindSplitStrat strategy);
      74                 : static int  _bt_defaultinterval(FindSplitData *state);
      75                 : static int  _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
      76                 :                          SplitPoint *rightpage, FindSplitStrat *strategy);
      77                 : static void _bt_interval_edges(FindSplitData *state,
      78                 :                                SplitPoint **leftinterval, SplitPoint **rightinterval);
      79                 : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
      80                 : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
      81                 :                                             SplitPoint *split);
      82                 : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
      83                 :                                               SplitPoint *split);
      84                 : 
      85                 : 
      86                 : /*
      87                 :  *  _bt_findsplitloc() -- find an appropriate place to split a page.
      88                 :  *
      89                 :  * The main goal here is to equalize the free space that will be on each
      90                 :  * split page, *after accounting for the inserted tuple*.  (If we fail to
      91                 :  * account for it, we might find ourselves with too little room on the page
      92                 :  * that it needs to go into!)
      93                 :  *
      94                 :  * If the page is the rightmost page on its level, we instead try to arrange
      95                 :  * to leave the left split page fillfactor% full.  In this way, when we are
      96                 :  * inserting successively increasing keys (consider sequences, timestamps,
      97                 :  * etc) we will end up with a tree whose pages are about fillfactor% full,
      98                 :  * instead of the 50% full result that we'd get without this special case.
      99                 :  * This is the same as nbtsort.c produces for a newly-created tree.  Note
     100                 :  * that leaf and nonleaf pages use different fillfactors.  Note also that
     101                 :  * there are a number of further special cases where fillfactor is not
     102                 :  * applied in the standard way.
     103                 :  *
     104                 :  * We are passed the intended insert position of the new tuple, expressed as
     105                 :  * the offsetnumber of the tuple it must go in front of (this could be
     106                 :  * maxoff+1 if the tuple is to go at the end).  The new tuple itself is also
     107                 :  * passed, since it's needed to give some weight to how effective suffix
     108                 :  * truncation will be.  The implementation picks the split point that
     109                 :  * maximizes the effectiveness of suffix truncation from a small list of
     110                 :  * alternative candidate split points that leave each side of the split with
     111                 :  * about the same share of free space.  Suffix truncation is secondary to
     112                 :  * equalizing free space, except in cases with large numbers of duplicates.
     113                 :  * Note that it is always assumed that caller goes on to perform truncation,
     114                 :  * even with pg_upgrade'd indexes where that isn't actually the case
     115                 :  * (!heapkeyspace indexes).  See nbtree/README for more information about
     116                 :  * suffix truncation.
     117                 :  *
     118                 :  * We return the index of the first existing tuple that should go on the
     119                 :  * righthand page (which is called firstrightoff), plus a boolean
     120                 :  * indicating whether the new tuple goes on the left or right page.  You
     121                 :  * can think of the returned state as a point _between_ two adjacent data
     122                 :  * items (laftleft and firstright data items) on an imaginary version of
     123                 :  * origpage that already includes newitem.  The bool is necessary to
     124                 :  * disambiguate the case where firstrightoff == newitemoff (i.e. it is
     125                 :  * sometimes needed to determine if the firstright tuple for the split is
     126                 :  * newitem rather than the tuple from origpage at offset firstrightoff).
     127                 :  */
     128                 : OffsetNumber
     129 CBC       23479 : _bt_findsplitloc(Relation rel,
     130                 :                  Page origpage,
     131                 :                  OffsetNumber newitemoff,
     132                 :                  Size newitemsz,
     133                 :                  IndexTuple newitem,
     134                 :                  bool *newitemonleft)
     135                 : {
     136                 :     BTPageOpaque opaque;
     137                 :     int         leftspace,
     138                 :                 rightspace,
     139                 :                 olddataitemstotal,
     140                 :                 olddataitemstoleft,
     141                 :                 perfectpenalty,
     142                 :                 leaffillfactor;
     143                 :     FindSplitData state;
     144                 :     FindSplitStrat strategy;
     145                 :     ItemId      itemid;
     146                 :     OffsetNumber offnum,
     147                 :                 maxoff,
     148                 :                 firstrightoff;
     149                 :     double      fillfactormult;
     150                 :     bool        usemult;
     151                 :     SplitPoint  leftpage,
     152                 :                 rightpage;
     153                 : 
     154           23479 :     opaque = BTPageGetOpaque(origpage);
     155           23479 :     maxoff = PageGetMaxOffsetNumber(origpage);
     156                 : 
     157                 :     /* Total free space available on a btree page, after fixed overhead */
     158           23479 :     leftspace = rightspace =
     159           23479 :         PageGetPageSize(origpage) - SizeOfPageHeaderData -
     160                 :         MAXALIGN(sizeof(BTPageOpaqueData));
     161                 : 
     162                 :     /* The right page will have the same high key as the old page */
     163           23479 :     if (!P_RIGHTMOST(opaque))
     164                 :     {
     165           11299 :         itemid = PageGetItemId(origpage, P_HIKEY);
     166           11299 :         rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
     167                 :                              sizeof(ItemIdData));
     168                 :     }
     169                 : 
     170                 :     /* Count up total space in data items before actually scanning 'em */
     171           23479 :     olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
     172           23479 :     leaffillfactor = BTGetFillFactor(rel);
     173                 : 
     174                 :     /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
     175           23479 :     newitemsz += sizeof(ItemIdData);
     176           23479 :     state.rel = rel;
     177           23479 :     state.origpage = origpage;
     178           23479 :     state.newitem = newitem;
     179           23479 :     state.newitemsz = newitemsz;
     180           23479 :     state.is_leaf = P_ISLEAF(opaque);
     181           23479 :     state.is_rightmost = P_RIGHTMOST(opaque);
     182           23479 :     state.leftspace = leftspace;
     183           23479 :     state.rightspace = rightspace;
     184           23479 :     state.olddataitemstotal = olddataitemstotal;
     185           23479 :     state.minfirstrightsz = SIZE_MAX;
     186           23479 :     state.newitemoff = newitemoff;
     187                 : 
     188                 :     /* newitem cannot be a posting list item */
     189           23479 :     Assert(!BTreeTupleIsPosting(newitem));
     190                 : 
     191                 :     /*
     192                 :      * nsplits should never exceed maxoff because there will be at most as
     193                 :      * many candidate split points as there are points _between_ tuples, once
     194                 :      * you imagine that the new item is already on the original page (the
     195                 :      * final number of splits may be slightly lower because not all points
     196                 :      * between tuples will be legal).
     197                 :      */
     198           23479 :     state.maxsplits = maxoff;
     199           23479 :     state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
     200           23479 :     state.nsplits = 0;
     201                 : 
     202                 :     /*
     203                 :      * Scan through the data items and calculate space usage for a split at
     204                 :      * each possible position
     205                 :      */
     206           23479 :     olddataitemstoleft = 0;
     207                 : 
     208           23479 :     for (offnum = P_FIRSTDATAKEY(opaque);
     209         6963551 :          offnum <= maxoff;
     210         6940072 :          offnum = OffsetNumberNext(offnum))
     211                 :     {
     212                 :         Size        itemsz;
     213                 : 
     214         6940072 :         itemid = PageGetItemId(origpage, offnum);
     215         6940072 :         itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
     216                 : 
     217                 :         /*
     218                 :          * When item offset number is not newitemoff, neither side of the
     219                 :          * split can be newitem.  Record a split after the previous data item
     220                 :          * from original page, but before the current data item from original
     221                 :          * page. (_bt_recsplitloc() will reject the split when there are no
     222                 :          * previous items, which we rely on.)
     223                 :          */
     224         6940072 :         if (offnum < newitemoff)
     225         5842433 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     226         1097639 :         else if (offnum > newitemoff)
     227         1083856 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     228                 :         else
     229                 :         {
     230                 :             /*
     231                 :              * Record a split after all "offnum < newitemoff" original page
     232                 :              * data items, but before newitem
     233                 :              */
     234           13783 :             _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
     235                 : 
     236                 :             /*
     237                 :              * Record a split after newitem, but before data item from
     238                 :              * original page at offset newitemoff/current offset
     239                 :              */
     240           13783 :             _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
     241                 :         }
     242                 : 
     243         6940072 :         olddataitemstoleft += itemsz;
     244                 :     }
     245                 : 
     246                 :     /*
     247                 :      * Record a split after all original page data items, but before newitem.
     248                 :      * (Though only when it's possible that newitem will end up alone on new
     249                 :      * right page.)
     250                 :      */
     251           23479 :     Assert(olddataitemstoleft == olddataitemstotal);
     252           23479 :     if (newitemoff > maxoff)
     253            9696 :         _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
     254                 : 
     255                 :     /*
     256                 :      * I believe it is not possible to fail to find a feasible split, but just
     257                 :      * in case ...
     258                 :      */
     259           23479 :     if (state.nsplits == 0)
     260 UBC           0 :         elog(ERROR, "could not find a feasible split point for index \"%s\"",
     261                 :              RelationGetRelationName(rel));
     262                 : 
     263                 :     /*
     264                 :      * Start search for a split point among list of legal split points.  Give
     265                 :      * primary consideration to equalizing available free space in each half
     266                 :      * of the split initially (start with default strategy), while applying
     267                 :      * rightmost and split-after-new-item optimizations where appropriate.
     268                 :      * Either of the two other fallback strategies may be required for cases
     269                 :      * with a large number of duplicates around the original/space-optimal
     270                 :      * split point.
     271                 :      *
     272                 :      * Default strategy gives some weight to suffix truncation in deciding a
     273                 :      * split point on leaf pages.  It attempts to select a split point where a
     274                 :      * distinguishing attribute appears earlier in the new high key for the
     275                 :      * left side of the split, in order to maximize the number of trailing
     276                 :      * attributes that can be truncated away.  Only candidate split points
     277                 :      * that imply an acceptable balance of free space on each side are
     278                 :      * considered.  See _bt_defaultinterval().
     279                 :      */
     280 CBC       23479 :     if (!state.is_leaf)
     281                 :     {
     282                 :         /* fillfactormult only used on rightmost page */
     283             101 :         usemult = state.is_rightmost;
     284             101 :         fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
     285                 :     }
     286           23378 :     else if (state.is_rightmost)
     287                 :     {
     288                 :         /* Rightmost leaf page --  fillfactormult always used */
     289           12104 :         usemult = true;
     290           12104 :         fillfactormult = leaffillfactor / 100.0;
     291                 :     }
     292           11274 :     else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
     293                 :     {
     294                 :         /*
     295                 :          * New item inserted at rightmost point among a localized grouping on
     296                 :          * a leaf page -- apply "split after new item" optimization, either by
     297                 :          * applying leaf fillfactor multiplier, or by choosing the exact split
     298                 :          * point that leaves newitem as lastleft. (usemult is set for us.)
     299                 :          */
     300             630 :         if (usemult)
     301                 :         {
     302                 :             /* fillfactormult should be set based on leaf fillfactor */
     303             544 :             fillfactormult = leaffillfactor / 100.0;
     304                 :         }
     305                 :         else
     306                 :         {
     307                 :             /* find precise split point after newitemoff */
     308           21267 :             for (int i = 0; i < state.nsplits; i++)
     309                 :             {
     310           21267 :                 SplitPoint *split = state.splits + i;
     311                 : 
     312           21267 :                 if (split->newitemonleft &&
     313              86 :                     newitemoff == split->firstrightoff)
     314                 :                 {
     315              86 :                     pfree(state.splits);
     316              86 :                     *newitemonleft = true;
     317              86 :                     return newitemoff;
     318                 :                 }
     319                 :             }
     320                 : 
     321                 :             /*
     322                 :              * Cannot legally split after newitemoff; proceed with split
     323                 :              * without using fillfactor multiplier.  This is defensive, and
     324                 :              * should never be needed in practice.
     325                 :              */
     326 UBC           0 :             fillfactormult = 0.50;
     327                 :         }
     328                 :     }
     329                 :     else
     330                 :     {
     331                 :         /* Other leaf page.  50:50 page split. */
     332 CBC       10644 :         usemult = false;
     333                 :         /* fillfactormult not used, but be tidy */
     334           10644 :         fillfactormult = 0.50;
     335                 :     }
     336                 : 
     337                 :     /*
     338                 :      * Save leftmost and rightmost splits for page before original ordinal
     339                 :      * sort order is lost by delta/fillfactormult sort
     340                 :      */
     341           23393 :     leftpage = state.splits[0];
     342           23393 :     rightpage = state.splits[state.nsplits - 1];
     343                 : 
     344                 :     /* Give split points a fillfactormult-wise delta, and sort on deltas */
     345           23393 :     _bt_deltasortsplits(&state, fillfactormult, usemult);
     346                 : 
     347                 :     /* Determine split interval for default strategy */
     348           23393 :     state.interval = _bt_defaultinterval(&state);
     349                 : 
     350                 :     /*
     351                 :      * Determine if default strategy/split interval will produce a
     352                 :      * sufficiently distinguishing split, or if we should change strategies.
     353                 :      * Alternative strategies change the range of split points that are
     354                 :      * considered acceptable (split interval), and possibly change
     355                 :      * fillfactormult, in order to deal with pages with a large number of
     356                 :      * duplicates gracefully.
     357                 :      *
     358                 :      * Pass low and high splits for the entire page (actually, they're for an
     359                 :      * imaginary version of the page that includes newitem).  These are used
     360                 :      * when the initial split interval encloses split points that are full of
     361                 :      * duplicates, and we need to consider if it's even possible to avoid
     362                 :      * appending a heap TID.
     363                 :      */
     364           23393 :     perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
     365                 : 
     366           23393 :     if (strategy == SPLIT_DEFAULT)
     367                 :     {
     368                 :         /*
     369                 :          * Default strategy worked out (always works out with internal page).
     370                 :          * Original split interval still stands.
     371                 :          */
     372                 :     }
     373                 : 
     374                 :     /*
     375                 :      * Many duplicates strategy is used when a heap TID would otherwise be
     376                 :      * appended, but the page isn't completely full of logical duplicates.
     377                 :      *
     378                 :      * The split interval is widened to include all legal candidate split
     379                 :      * points.  There might be a few as two distinct values in the whole-page
     380                 :      * split interval, though it's also possible that most of the values on
     381                 :      * the page are unique.  The final split point will either be to the
     382                 :      * immediate left or to the immediate right of the group of duplicate
     383                 :      * tuples that enclose the first/delta-optimal split point (perfect
     384                 :      * penalty was set so that the lowest delta split point that avoids
     385                 :      * appending a heap TID will be chosen).  Maximizing the number of
     386                 :      * attributes that can be truncated away is not a goal of the many
     387                 :      * duplicates strategy.
     388                 :      *
     389                 :      * Single value strategy is used when it is impossible to avoid appending
     390                 :      * a heap TID.  It arranges to leave the left page very full.  This
     391                 :      * maximizes space utilization in cases where tuples with the same
     392                 :      * attribute values span many pages.  Newly inserted duplicates will tend
     393                 :      * to have higher heap TID values, so we'll end up splitting to the right
     394                 :      * consistently.  (Single value strategy is harmless though not
     395                 :      * particularly useful with !heapkeyspace indexes.)
     396                 :      */
     397             459 :     else if (strategy == SPLIT_MANY_DUPLICATES)
     398                 :     {
     399             325 :         Assert(state.is_leaf);
     400                 :         /* Shouldn't try to truncate away extra user attributes */
     401             325 :         Assert(perfectpenalty ==
     402                 :                IndexRelationGetNumberOfKeyAttributes(state.rel));
     403                 :         /* No need to resort splits -- no change in fillfactormult/deltas */
     404             325 :         state.interval = state.nsplits;
     405                 :     }
     406             134 :     else if (strategy == SPLIT_SINGLE_VALUE)
     407                 :     {
     408             134 :         Assert(state.is_leaf);
     409                 :         /* Split near the end of the page */
     410             134 :         usemult = true;
     411             134 :         fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
     412                 :         /* Resort split points with new delta */
     413             134 :         _bt_deltasortsplits(&state, fillfactormult, usemult);
     414                 :         /* Appending a heap TID is unavoidable, so interval of 1 is fine */
     415             134 :         state.interval = 1;
     416                 :     }
     417                 : 
     418                 :     /*
     419                 :      * Search among acceptable split points (using final split interval) for
     420                 :      * the entry that has the lowest penalty, and is therefore expected to
     421                 :      * maximize fan-out.  Sets *newitemonleft for us.
     422                 :      */
     423           23393 :     firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
     424                 :                                      strategy);
     425           23393 :     pfree(state.splits);
     426                 : 
     427           23393 :     return firstrightoff;
     428                 : }
     429                 : 
     430                 : /*
     431                 :  * Subroutine to record a particular point between two tuples (possibly the
     432                 :  * new item) on page (ie, combination of firstrightoff and newitemonleft
     433                 :  * settings) in *state for later analysis.  This is also a convenient point to
     434                 :  * check if the split is legal (if it isn't, it won't be recorded).
     435                 :  *
     436                 :  * firstrightoff is the offset of the first item on the original page that
     437                 :  * goes to the right page, and firstrightofforigpagetuplesz is the size of
     438                 :  * that tuple.  firstrightoff can be > max offset, which means that all the
     439                 :  * old items go to the left page and only the new item goes to the right page.
     440                 :  * We don't actually use firstrightofforigpagetuplesz in that case (actually,
     441                 :  * we don't use it for _any_ split where the firstright tuple happens to be
     442                 :  * newitem).
     443                 :  *
     444                 :  * olddataitemstoleft is the total size of all old items to the left of the
     445                 :  * split point that is recorded here when legal.  Should not include
     446                 :  * newitemsz, since that is handled here.
     447                 :  */
     448                 : static void
     449         6963551 : _bt_recsplitloc(FindSplitData *state,
     450                 :                 OffsetNumber firstrightoff,
     451                 :                 bool newitemonleft,
     452                 :                 int olddataitemstoleft,
     453                 :                 Size firstrightofforigpagetuplesz)
     454                 : {
     455                 :     int16       leftfree,
     456                 :                 rightfree;
     457                 :     Size        firstrightsz;
     458         6963551 :     Size        postingsz = 0;
     459                 :     bool        newitemisfirstright;
     460                 : 
     461                 :     /* Is the new item going to be split point's firstright tuple? */
     462         7000813 :     newitemisfirstright = (firstrightoff == state->newitemoff &&
     463           37262 :                            !newitemonleft);
     464                 : 
     465         6963551 :     if (newitemisfirstright)
     466           23479 :         firstrightsz = state->newitemsz;
     467                 :     else
     468                 :     {
     469         6940072 :         firstrightsz = firstrightofforigpagetuplesz;
     470                 : 
     471                 :         /*
     472                 :          * Calculate suffix truncation space saving when firstright tuple is a
     473                 :          * posting list tuple, though only when the tuple is over 64 bytes
     474                 :          * including line pointer overhead (arbitrary).  This avoids accessing
     475                 :          * the tuple in cases where its posting list must be very small (if
     476                 :          * tuple has one at all).
     477                 :          *
     478                 :          * Note: We don't do this in the case where firstright tuple is
     479                 :          * newitem, since newitem cannot have a posting list.
     480                 :          */
     481         6940072 :         if (state->is_leaf && firstrightsz > 64)
     482                 :         {
     483                 :             ItemId      itemid;
     484                 :             IndexTuple  newhighkey;
     485                 : 
     486           34554 :             itemid = PageGetItemId(state->origpage, firstrightoff);
     487           34554 :             newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
     488                 : 
     489           34554 :             if (BTreeTupleIsPosting(newhighkey))
     490           47272 :                 postingsz = IndexTupleSize(newhighkey) -
     491           23636 :                     BTreeTupleGetPostingOffset(newhighkey);
     492                 :         }
     493                 :     }
     494                 : 
     495                 :     /* Account for all the old tuples */
     496         6963551 :     leftfree = state->leftspace - olddataitemstoleft;
     497         6963551 :     rightfree = state->rightspace -
     498         6963551 :         (state->olddataitemstotal - olddataitemstoleft);
     499                 : 
     500                 :     /*
     501                 :      * The first item on the right page becomes the high key of the left page;
     502                 :      * therefore it counts against left space as well as right space (we
     503                 :      * cannot assume that suffix truncation will make it any smaller).  When
     504                 :      * index has included attributes, then those attributes of left page high
     505                 :      * key will be truncated leaving that page with slightly more free space.
     506                 :      * However, that shouldn't affect our ability to find valid split
     507                 :      * location, since we err in the direction of being pessimistic about free
     508                 :      * space on the left half.  Besides, even when suffix truncation of
     509                 :      * non-TID attributes occurs, the new high key often won't even be a
     510                 :      * single MAXALIGN() quantum smaller than the firstright tuple it's based
     511                 :      * on.
     512                 :      *
     513                 :      * If we are on the leaf level, assume that suffix truncation cannot avoid
     514                 :      * adding a heap TID to the left half's new high key when splitting at the
     515                 :      * leaf level.  In practice the new high key will often be smaller and
     516                 :      * will rarely be larger, but conservatively assume the worst case.  We do
     517                 :      * go to the trouble of subtracting away posting list overhead, though
     518                 :      * only when it looks like it will make an appreciable difference.
     519                 :      * (Posting lists are the only case where truncation will typically make
     520                 :      * the final high key far smaller than firstright, so being a bit more
     521                 :      * precise there noticeably improves the balance of free space.)
     522                 :      */
     523         6963551 :     if (state->is_leaf)
     524         6961659 :         leftfree -= (int16) (firstrightsz +
     525         6961659 :                              MAXALIGN(sizeof(ItemPointerData)) -
     526                 :                              postingsz);
     527                 :     else
     528            1892 :         leftfree -= (int16) firstrightsz;
     529                 : 
     530                 :     /* account for the new item */
     531         6963551 :     if (newitemonleft)
     532         1097639 :         leftfree -= (int16) state->newitemsz;
     533                 :     else
     534         5865912 :         rightfree -= (int16) state->newitemsz;
     535                 : 
     536                 :     /*
     537                 :      * If we are not on the leaf level, we will be able to discard the key
     538                 :      * data from the first item that winds up on the right page.
     539                 :      */
     540         6963551 :     if (!state->is_leaf)
     541            1892 :         rightfree += (int16) firstrightsz -
     542                 :             (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
     543                 : 
     544                 :     /* Record split if legal */
     545         6963551 :     if (leftfree >= 0 && rightfree >= 0)
     546                 :     {
     547         6916323 :         Assert(state->nsplits < state->maxsplits);
     548                 : 
     549                 :         /* Determine smallest firstright tuple size among legal splits */
     550         6916323 :         state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
     551                 : 
     552         6916323 :         state->splits[state->nsplits].curdelta = 0;
     553         6916323 :         state->splits[state->nsplits].leftfree = leftfree;
     554         6916323 :         state->splits[state->nsplits].rightfree = rightfree;
     555         6916323 :         state->splits[state->nsplits].firstrightoff = firstrightoff;
     556         6916323 :         state->splits[state->nsplits].newitemonleft = newitemonleft;
     557         6916323 :         state->nsplits++;
     558                 :     }
     559         6963551 : }
     560                 : 
     561                 : /*
     562                 :  * Subroutine to assign space deltas to materialized array of candidate split
     563                 :  * points based on current fillfactor, and to sort array using that fillfactor
     564                 :  */
     565                 : static void
     566           23527 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
     567                 :                     bool usemult)
     568                 : {
     569         6909689 :     for (int i = 0; i < state->nsplits; i++)
     570                 :     {
     571         6886162 :         SplitPoint *split = state->splits + i;
     572                 :         int16       delta;
     573                 : 
     574         6886162 :         if (usemult)
     575         4115154 :             delta = fillfactormult * split->leftfree -
     576         4115154 :                 (1.0 - fillfactormult) * split->rightfree;
     577                 :         else
     578         2771008 :             delta = split->leftfree - split->rightfree;
     579                 : 
     580         6886162 :         if (delta < 0)
     581         1785017 :             delta = -delta;
     582                 : 
     583                 :         /* Save delta */
     584         6886162 :         split->curdelta = delta;
     585                 :     }
     586                 : 
     587           23527 :     qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
     588           23527 : }
     589                 : 
     590                 : /*
     591                 :  * qsort-style comparator used by _bt_deltasortsplits()
     592                 :  */
     593                 : static int
     594        73252432 : _bt_splitcmp(const void *arg1, const void *arg2)
     595                 : {
     596        73252432 :     SplitPoint *split1 = (SplitPoint *) arg1;
     597        73252432 :     SplitPoint *split2 = (SplitPoint *) arg2;
     598                 : 
     599        73252432 :     if (split1->curdelta > split2->curdelta)
     600        28609584 :         return 1;
     601        44642848 :     if (split1->curdelta < split2->curdelta)
     602        44523761 :         return -1;
     603                 : 
     604          119087 :     return 0;
     605                 : }
     606                 : 
     607                 : /*
     608                 :  * Subroutine to determine whether or not a non-rightmost leaf page should be
     609                 :  * split immediately after the would-be original page offset for the
     610                 :  * new/incoming tuple (or should have leaf fillfactor applied when new item is
     611                 :  * to the right on original page).  This is appropriate when there is a
     612                 :  * pattern of localized monotonically increasing insertions into a composite
     613                 :  * index, where leading attribute values form local groupings, and we
     614                 :  * anticipate further insertions of the same/current grouping (new item's
     615                 :  * grouping) in the near future.  This can be thought of as a variation on
     616                 :  * applying leaf fillfactor during rightmost leaf page splits, since cases
     617                 :  * that benefit will converge on packing leaf pages leaffillfactor% full over
     618                 :  * time.
     619                 :  *
     620                 :  * We may leave extra free space remaining on the rightmost page of a "most
     621                 :  * significant column" grouping of tuples if that grouping never ends up
     622                 :  * having future insertions that use the free space.  That effect is
     623                 :  * self-limiting; a future grouping that becomes the "nearest on the right"
     624                 :  * grouping of the affected grouping usually puts the extra free space to good
     625                 :  * use.
     626                 :  *
     627                 :  * Caller uses optimization when routine returns true, though the exact action
     628                 :  * taken by caller varies.  Caller uses original leaf page fillfactor in
     629                 :  * standard way rather than using the new item offset directly when *usemult
     630                 :  * was also set to true here.  Otherwise, caller applies optimization by
     631                 :  * locating the legal split point that makes the new tuple the lastleft tuple
     632                 :  * for the split.
     633                 :  */
     634                 : static bool
     635           11274 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
     636                 :                     int leaffillfactor, bool *usemult)
     637                 : {
     638                 :     int16       nkeyatts;
     639                 :     ItemId      itemid;
     640                 :     IndexTuple  tup;
     641                 :     int         keepnatts;
     642                 : 
     643           11274 :     Assert(state->is_leaf && !state->is_rightmost);
     644                 : 
     645           11274 :     nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     646                 : 
     647                 :     /* Single key indexes not considered here */
     648           11274 :     if (nkeyatts == 1)
     649             585 :         return false;
     650                 : 
     651                 :     /* Ascending insertion pattern never inferred when new item is first */
     652           10689 :     if (state->newitemoff == P_FIRSTKEY)
     653               1 :         return false;
     654                 : 
     655                 :     /*
     656                 :      * Only apply optimization on pages with equisized tuples, since ordinal
     657                 :      * keys are likely to be fixed-width.  Testing if the new tuple is
     658                 :      * variable width directly might also work, but that fails to apply the
     659                 :      * optimization to indexes with a numeric_ops attribute.
     660                 :      *
     661                 :      * Conclude that page has equisized tuples when the new item is the same
     662                 :      * width as the smallest item observed during pass over page, and other
     663                 :      * non-pivot tuples must be the same width as well.  (Note that the
     664                 :      * possibly-truncated existing high key isn't counted in
     665                 :      * olddataitemstotal, and must be subtracted from maxoff.)
     666                 :      */
     667           10688 :     if (state->newitemsz != state->minfirstrightsz)
     668            3418 :         return false;
     669            7270 :     if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
     670            4185 :         return false;
     671                 : 
     672                 :     /*
     673                 :      * Avoid applying optimization when tuples are wider than a tuple
     674                 :      * consisting of two non-NULL int8/int64 attributes (or four non-NULL
     675                 :      * int4/int32 attributes)
     676                 :      */
     677            3085 :     if (state->newitemsz >
     678                 :         MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
     679                 :         sizeof(ItemIdData))
     680 UBC           0 :         return false;
     681                 : 
     682                 :     /*
     683                 :      * At least the first attribute's value must be equal to the corresponding
     684                 :      * value in previous tuple to apply optimization.  New item cannot be a
     685                 :      * duplicate, either.
     686                 :      *
     687                 :      * Handle case where new item is to the right of all items on the existing
     688                 :      * page.  This is suggestive of monotonically increasing insertions in
     689                 :      * itself, so the "heap TID adjacency" test is not applied here.
     690                 :      */
     691 CBC        3085 :     if (state->newitemoff > maxoff)
     692                 :     {
     693             525 :         itemid = PageGetItemId(state->origpage, maxoff);
     694             525 :         tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     695             525 :         keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     696                 : 
     697             525 :         if (keepnatts > 1 && keepnatts <= nkeyatts)
     698                 :         {
     699             525 :             *usemult = true;
     700             525 :             return true;
     701                 :         }
     702                 : 
     703 UBC           0 :         return false;
     704                 :     }
     705                 : 
     706                 :     /*
     707                 :      * "Low cardinality leading column, high cardinality suffix column"
     708                 :      * indexes with a random insertion pattern (e.g., an index with a boolean
     709                 :      * column, such as an index on '(book_is_in_print, book_isbn)') present us
     710                 :      * with a risk of consistently misapplying the optimization.  We're
     711                 :      * willing to accept very occasional misapplication of the optimization,
     712                 :      * provided the cases where we get it wrong are rare and self-limiting.
     713                 :      *
     714                 :      * Heap TID adjacency strongly suggests that the item just to the left was
     715                 :      * inserted very recently, which limits overapplication of the
     716                 :      * optimization.  Besides, all inappropriate cases triggered here will
     717                 :      * still split in the middle of the page on average.
     718                 :      */
     719 CBC        2560 :     itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
     720            2560 :     tup = (IndexTuple) PageGetItem(state->origpage, itemid);
     721                 :     /* Do cheaper test first */
     722            2560 :     if (BTreeTupleIsPosting(tup) ||
     723            2560 :         !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
     724            2442 :         return false;
     725                 :     /* Check same conditions as rightmost item case, too */
     726             118 :     keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
     727                 : 
     728             118 :     if (keepnatts > 1 && keepnatts <= nkeyatts)
     729                 :     {
     730             105 :         double      interp = (double) state->newitemoff / ((double) maxoff + 1);
     731             105 :         double      leaffillfactormult = (double) leaffillfactor / 100.0;
     732                 : 
     733                 :         /*
     734                 :          * Don't allow caller to split after a new item when it will result in
     735                 :          * a split point to the right of the point that a leaf fillfactor
     736                 :          * split would use -- have caller apply leaf fillfactor instead
     737                 :          */
     738             105 :         *usemult = interp > leaffillfactormult;
     739                 : 
     740             105 :         return true;
     741                 :     }
     742                 : 
     743              13 :     return false;
     744                 : }
     745                 : 
     746                 : /*
     747                 :  * Subroutine for determining if two heap TIDS are "adjacent".
     748                 :  *
     749                 :  * Adjacent means that the high TID is very likely to have been inserted into
     750                 :  * heap relation immediately after the low TID, probably during the current
     751                 :  * transaction.
     752                 :  */
     753                 : static bool
     754            2560 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
     755                 : {
     756                 :     BlockNumber lowblk,
     757                 :                 highblk;
     758                 : 
     759            2560 :     lowblk = ItemPointerGetBlockNumber(lowhtid);
     760            2560 :     highblk = ItemPointerGetBlockNumber(highhtid);
     761                 : 
     762                 :     /* Make optimistic assumption of adjacency when heap blocks match */
     763            2560 :     if (lowblk == highblk)
     764             118 :         return true;
     765                 : 
     766                 :     /* When heap block one up, second offset should be FirstOffsetNumber */
     767            3664 :     if (lowblk + 1 == highblk &&
     768            1222 :         ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
     769 UBC           0 :         return true;
     770                 : 
     771 CBC        2442 :     return false;
     772                 : }
     773                 : 
     774                 : /*
     775                 :  * Subroutine to find the "best" split point among candidate split points.
     776                 :  * The best split point is the split point with the lowest penalty among split
     777                 :  * points that fall within current/final split interval.  Penalty is an
     778                 :  * abstract score, with a definition that varies depending on whether we're
     779                 :  * splitting a leaf page or an internal page.  See _bt_split_penalty() for
     780                 :  * details.
     781                 :  *
     782                 :  * "perfectpenalty" is assumed to be the lowest possible penalty among
     783                 :  * candidate split points.  This allows us to return early without wasting
     784                 :  * cycles on calculating the first differing attribute for all candidate
     785                 :  * splits when that clearly cannot improve our choice (or when we only want a
     786                 :  * minimally distinguishing split point, and don't want to make the split any
     787                 :  * more unbalanced than is necessary).
     788                 :  *
     789                 :  * We return the index of the first existing tuple that should go on the right
     790                 :  * page, plus a boolean indicating if new item is on left of split point.
     791                 :  */
     792                 : static OffsetNumber
     793           23393 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
     794                 :                  bool *newitemonleft, FindSplitStrat strategy)
     795                 : {
     796                 :     int         bestpenalty,
     797                 :                 lowsplit;
     798           23393 :     int         highsplit = Min(state->interval, state->nsplits);
     799                 :     SplitPoint *final;
     800                 : 
     801           23393 :     bestpenalty = INT_MAX;
     802           23393 :     lowsplit = 0;
     803           69290 :     for (int i = lowsplit; i < highsplit; i++)
     804                 :     {
     805                 :         int         penalty;
     806                 : 
     807           69271 :         penalty = _bt_split_penalty(state, state->splits + i);
     808                 : 
     809           69271 :         if (penalty < bestpenalty)
     810                 :         {
     811           30210 :             bestpenalty = penalty;
     812           30210 :             lowsplit = i;
     813                 :         }
     814                 : 
     815           69271 :         if (penalty <= perfectpenalty)
     816           23374 :             break;
     817                 :     }
     818                 : 
     819           23393 :     final = &state->splits[lowsplit];
     820                 : 
     821                 :     /*
     822                 :      * There is a risk that the "many duplicates" strategy will repeatedly do
     823                 :      * the wrong thing when there are monotonically decreasing insertions to
     824                 :      * the right of a large group of duplicates.   Repeated splits could leave
     825                 :      * a succession of right half pages with free space that can never be
     826                 :      * used.  This must be avoided.
     827                 :      *
     828                 :      * Consider the example of the leftmost page in a single integer attribute
     829                 :      * NULLS FIRST index which is almost filled with NULLs.  Monotonically
     830                 :      * decreasing integer insertions might cause the same leftmost page to
     831                 :      * split repeatedly at the same point.  Each split derives its new high
     832                 :      * key from the lowest current value to the immediate right of the large
     833                 :      * group of NULLs, which will always be higher than all future integer
     834                 :      * insertions, directing all future integer insertions to the same
     835                 :      * leftmost page.
     836                 :      */
     837           23393 :     if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
     838             321 :         !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
     839 UBC           0 :         final->firstrightoff < state->newitemoff + 9)
     840                 :     {
     841                 :         /*
     842                 :          * Avoid the problem by performing a 50:50 split when the new item is
     843                 :          * just to the right of the would-be "many duplicates" split point.
     844                 :          * (Note that the test used for an insert that is "just to the right"
     845                 :          * of the split point is conservative.)
     846                 :          */
     847               0 :         final = &state->splits[0];
     848                 :     }
     849                 : 
     850 CBC       23393 :     *newitemonleft = final->newitemonleft;
     851           23393 :     return final->firstrightoff;
     852                 : }
     853                 : 
     854                 : #define LEAF_SPLIT_DISTANCE         0.050
     855                 : #define INTERNAL_SPLIT_DISTANCE     0.075
     856                 : 
     857                 : /*
     858                 :  * Return a split interval to use for the default strategy.  This is a limit
     859                 :  * on the number of candidate split points to give further consideration to.
     860                 :  * Only a fraction of all candidate splits points (those located at the start
     861                 :  * of the now-sorted splits array) fall within the split interval.  Split
     862                 :  * interval is applied within _bt_bestsplitloc().
     863                 :  *
     864                 :  * Split interval represents an acceptable range of split points -- those that
     865                 :  * have leftfree and rightfree values that are acceptably balanced.  The final
     866                 :  * split point chosen is the split point with the lowest "penalty" among split
     867                 :  * points in this split interval (unless we change our entire strategy, in
     868                 :  * which case the interval also changes -- see _bt_strategy()).
     869                 :  *
     870                 :  * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
     871                 :  * and sigma b for internal ("branch") splits.  It's hard to provide a
     872                 :  * theoretical justification for the size of the split interval, though it's
     873                 :  * clear that a small split interval can make tuples on level L+1 much smaller
     874                 :  * on average, without noticeably affecting space utilization on level L.
     875                 :  * (Note that the way that we calculate split interval might need to change if
     876                 :  * suffix truncation is taught to truncate tuples "within" the last
     877                 :  * attribute/datum for data types like text, which is more or less how it is
     878                 :  * assumed to work in the paper.)
     879                 :  */
     880                 : static int
     881           23393 : _bt_defaultinterval(FindSplitData *state)
     882                 : {
     883                 :     SplitPoint *spaceoptimal;
     884                 :     int16       tolerance,
     885                 :                 lowleftfree,
     886                 :                 lowrightfree,
     887                 :                 highleftfree,
     888                 :                 highrightfree;
     889                 : 
     890                 :     /*
     891                 :      * Determine leftfree and rightfree values that are higher and lower than
     892                 :      * we're willing to tolerate.  Note that the final split interval will be
     893                 :      * about 10% of nsplits in the common case where all non-pivot tuples
     894                 :      * (data items) from a leaf page are uniformly sized.  We're a bit more
     895                 :      * aggressive when splitting internal pages.
     896                 :      */
     897           23393 :     if (state->is_leaf)
     898           23292 :         tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
     899                 :     else
     900             101 :         tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
     901                 : 
     902                 :     /* First candidate split point is the most evenly balanced */
     903           23393 :     spaceoptimal = state->splits;
     904           23393 :     lowleftfree = spaceoptimal->leftfree - tolerance;
     905           23393 :     lowrightfree = spaceoptimal->rightfree - tolerance;
     906           23393 :     highleftfree = spaceoptimal->leftfree + tolerance;
     907           23393 :     highrightfree = spaceoptimal->rightfree + tolerance;
     908                 : 
     909                 :     /*
     910                 :      * Iterate through split points, starting from the split immediately after
     911                 :      * 'spaceoptimal'.  Find the first split point that divides free space so
     912                 :      * unevenly that including it in the split interval would be unacceptable.
     913                 :      */
     914          687983 :     for (int i = 1; i < state->nsplits; i++)
     915                 :     {
     916          687983 :         SplitPoint *split = state->splits + i;
     917                 : 
     918                 :         /* Cannot use curdelta here, since its value is often weighted */
     919          687983 :         if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
     920          665582 :             split->leftfree > highleftfree || split->rightfree > highrightfree)
     921           23393 :             return i;
     922                 :     }
     923                 : 
     924 UBC           0 :     return state->nsplits;
     925                 : }
     926                 : 
     927                 : /*
     928                 :  * Subroutine to decide whether split should use default strategy/initial
     929                 :  * split interval, or whether it should finish splitting the page using
     930                 :  * alternative strategies (this is only possible with leaf pages).
     931                 :  *
     932                 :  * Caller uses alternative strategy (or sticks with default strategy) based
     933                 :  * on how *strategy is set here.  Return value is "perfect penalty", which is
     934                 :  * passed to _bt_bestsplitloc() as a final constraint on how far caller is
     935                 :  * willing to go to avoid appending a heap TID when using the many duplicates
     936                 :  * strategy (it also saves _bt_bestsplitloc() useless cycles).
     937                 :  */
     938                 : static int
     939 CBC       23393 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
     940                 :              SplitPoint *rightpage, FindSplitStrat *strategy)
     941                 : {
     942                 :     IndexTuple  leftmost,
     943                 :                 rightmost;
     944                 :     SplitPoint *leftinterval,
     945                 :                *rightinterval;
     946                 :     int         perfectpenalty;
     947           23393 :     int         indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
     948                 : 
     949                 :     /* Assume that alternative strategy won't be used for now */
     950           23393 :     *strategy = SPLIT_DEFAULT;
     951                 : 
     952                 :     /*
     953                 :      * Use smallest observed firstright item size for entire page (actually,
     954                 :      * entire imaginary version of page that includes newitem) as perfect
     955                 :      * penalty on internal pages.  This can save cycles in the common case
     956                 :      * where most or all splits (not just splits within interval) have
     957                 :      * firstright tuples that are the same size.
     958                 :      */
     959           23393 :     if (!state->is_leaf)
     960             101 :         return state->minfirstrightsz;
     961                 : 
     962                 :     /*
     963                 :      * Use leftmost and rightmost tuples from leftmost and rightmost splits in
     964                 :      * current split interval
     965                 :      */
     966           23292 :     _bt_interval_edges(state, &leftinterval, &rightinterval);
     967           23292 :     leftmost = _bt_split_lastleft(state, leftinterval);
     968           23292 :     rightmost = _bt_split_firstright(state, rightinterval);
     969                 : 
     970                 :     /*
     971                 :      * If initial split interval can produce a split point that will at least
     972                 :      * avoid appending a heap TID in new high key, we're done.  Finish split
     973                 :      * with default strategy and initial split interval.
     974                 :      */
     975           23292 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     976           23292 :     if (perfectpenalty <= indnkeyatts)
     977           22808 :         return perfectpenalty;
     978                 : 
     979                 :     /*
     980                 :      * Work out how caller should finish split when even their "perfect"
     981                 :      * penalty for initial/default split interval indicates that the interval
     982                 :      * does not contain even a single split that avoids appending a heap TID.
     983                 :      *
     984                 :      * Use the leftmost split's lastleft tuple and the rightmost split's
     985                 :      * firstright tuple to assess every possible split.
     986                 :      */
     987             484 :     leftmost = _bt_split_lastleft(state, leftpage);
     988             484 :     rightmost = _bt_split_firstright(state, rightpage);
     989                 : 
     990                 :     /*
     991                 :      * If page (including new item) has many duplicates but is not entirely
     992                 :      * full of duplicates, a many duplicates strategy split will be performed.
     993                 :      * If page is entirely full of duplicates, a single value strategy split
     994                 :      * will be performed.
     995                 :      */
     996             484 :     perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
     997             484 :     if (perfectpenalty <= indnkeyatts)
     998                 :     {
     999             325 :         *strategy = SPLIT_MANY_DUPLICATES;
    1000                 : 
    1001                 :         /*
    1002                 :          * Many duplicates strategy should split at either side the group of
    1003                 :          * duplicates that enclose the delta-optimal split point.  Return
    1004                 :          * indnkeyatts rather than the true perfect penalty to make that
    1005                 :          * happen.  (If perfectpenalty was returned here then low cardinality
    1006                 :          * composite indexes could have continual unbalanced splits.)
    1007                 :          *
    1008                 :          * Note that caller won't go through with a many duplicates split in
    1009                 :          * rare cases where it looks like there are ever-decreasing insertions
    1010                 :          * to the immediate right of the split point.  This must happen just
    1011                 :          * before a final decision is made, within _bt_bestsplitloc().
    1012                 :          */
    1013             325 :         return indnkeyatts;
    1014                 :     }
    1015                 : 
    1016                 :     /*
    1017                 :      * Single value strategy is only appropriate with ever-increasing heap
    1018                 :      * TIDs; otherwise, original default strategy split should proceed to
    1019                 :      * avoid pathological performance.  Use page high key to infer if this is
    1020                 :      * the rightmost page among pages that store the same duplicate value.
    1021                 :      * This should not prevent insertions of heap TIDs that are slightly out
    1022                 :      * of order from using single value strategy, since that's expected with
    1023                 :      * concurrent inserters of the same duplicate value.
    1024                 :      */
    1025             159 :     else if (state->is_rightmost)
    1026             115 :         *strategy = SPLIT_SINGLE_VALUE;
    1027                 :     else
    1028                 :     {
    1029                 :         ItemId      itemid;
    1030                 :         IndexTuple  hikey;
    1031                 : 
    1032              44 :         itemid = PageGetItemId(state->origpage, P_HIKEY);
    1033              44 :         hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
    1034              44 :         perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
    1035                 :                                              state->newitem);
    1036              44 :         if (perfectpenalty <= indnkeyatts)
    1037              19 :             *strategy = SPLIT_SINGLE_VALUE;
    1038                 :         else
    1039                 :         {
    1040                 :             /*
    1041                 :              * Have caller finish split using default strategy, since page
    1042                 :              * does not appear to be the rightmost page for duplicates of the
    1043                 :              * value the page is filled with
    1044                 :              */
    1045                 :         }
    1046                 :     }
    1047                 : 
    1048             159 :     return perfectpenalty;
    1049                 : }
    1050                 : 
    1051                 : /*
    1052                 :  * Subroutine to locate leftmost and rightmost splits for current/default
    1053                 :  * split interval.  Note that it will be the same split iff there is only one
    1054                 :  * split in interval.
    1055                 :  */
    1056                 : static void
    1057           23292 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
    1058                 :                    SplitPoint **rightinterval)
    1059                 : {
    1060           23292 :     int         highsplit = Min(state->interval, state->nsplits);
    1061                 :     SplitPoint *deltaoptimal;
    1062                 : 
    1063           23292 :     deltaoptimal = state->splits;
    1064           23292 :     *leftinterval = NULL;
    1065           23292 :     *rightinterval = NULL;
    1066                 : 
    1067                 :     /*
    1068                 :      * Delta is an absolute distance to optimal split point, so both the
    1069                 :      * leftmost and rightmost split point will usually be at the end of the
    1070                 :      * array
    1071                 :      */
    1072           46383 :     for (int i = highsplit - 1; i >= 0; i--)
    1073                 :     {
    1074           46383 :         SplitPoint *distant = state->splits + i;
    1075                 : 
    1076           46383 :         if (distant->firstrightoff < deltaoptimal->firstrightoff)
    1077                 :         {
    1078           22933 :             if (*leftinterval == NULL)
    1079           22799 :                 *leftinterval = distant;
    1080                 :         }
    1081           23450 :         else if (distant->firstrightoff > deltaoptimal->firstrightoff)
    1082                 :         {
    1083           22948 :             if (*rightinterval == NULL)
    1084           22828 :                 *rightinterval = distant;
    1085                 :         }
    1086             502 :         else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
    1087                 :         {
    1088                 :             /*
    1089                 :              * "incoming tuple will become firstright" (distant) is to the
    1090                 :              * left of "incoming tuple will become lastleft" (delta-optimal)
    1091                 :              */
    1092 UBC           0 :             Assert(distant->firstrightoff == state->newitemoff);
    1093               0 :             if (*leftinterval == NULL)
    1094               0 :                 *leftinterval = distant;
    1095                 :         }
    1096 CBC         502 :         else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
    1097                 :         {
    1098                 :             /*
    1099                 :              * "incoming tuple will become lastleft" (distant) is to the right
    1100                 :              * of "incoming tuple will become firstright" (delta-optimal)
    1101                 :              */
    1102               7 :             Assert(distant->firstrightoff == state->newitemoff);
    1103               7 :             if (*rightinterval == NULL)
    1104               4 :                 *rightinterval = distant;
    1105                 :         }
    1106                 :         else
    1107                 :         {
    1108                 :             /* There was only one or two splits in initial split interval */
    1109             495 :             Assert(distant == deltaoptimal);
    1110             495 :             if (*leftinterval == NULL)
    1111             493 :                 *leftinterval = distant;
    1112             495 :             if (*rightinterval == NULL)
    1113             460 :                 *rightinterval = distant;
    1114                 :         }
    1115                 : 
    1116           46383 :         if (*leftinterval && *rightinterval)
    1117           23292 :             return;
    1118                 :     }
    1119                 : 
    1120 UBC           0 :     Assert(false);
    1121                 : }
    1122                 : 
    1123                 : /*
    1124                 :  * Subroutine to find penalty for caller's candidate split point.
    1125                 :  *
    1126                 :  * On leaf pages, penalty is the attribute number that distinguishes each side
    1127                 :  * of a split.  It's the last attribute that needs to be included in new high
    1128                 :  * key for left page.  It can be greater than the number of key attributes in
    1129                 :  * cases where a heap TID will need to be appended during truncation.
    1130                 :  *
    1131                 :  * On internal pages, penalty is simply the size of the firstright tuple for
    1132                 :  * the split (including line pointer overhead).  This tuple will become the
    1133                 :  * new high key for the left page.
    1134                 :  */
    1135                 : static inline int
    1136 CBC       69271 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
    1137                 : {
    1138                 :     IndexTuple  lastleft;
    1139                 :     IndexTuple  firstright;
    1140                 : 
    1141           69271 :     if (!state->is_leaf)
    1142                 :     {
    1143                 :         ItemId      itemid;
    1144                 : 
    1145             112 :         if (!split->newitemonleft &&
    1146             112 :             split->firstrightoff == state->newitemoff)
    1147               6 :             return state->newitemsz;
    1148                 : 
    1149             106 :         itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1150                 : 
    1151             106 :         return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
    1152                 :     }
    1153                 : 
    1154           69159 :     lastleft = _bt_split_lastleft(state, split);
    1155           69159 :     firstright = _bt_split_firstright(state, split);
    1156                 : 
    1157           69159 :     return _bt_keep_natts_fast(state->rel, lastleft, firstright);
    1158                 : }
    1159                 : 
    1160                 : /*
    1161                 :  * Subroutine to get a lastleft IndexTuple for a split point
    1162                 :  */
    1163                 : static inline IndexTuple
    1164           92935 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
    1165                 : {
    1166                 :     ItemId      itemid;
    1167                 : 
    1168           92935 :     if (split->newitemonleft && split->firstrightoff == state->newitemoff)
    1169             120 :         return state->newitem;
    1170                 : 
    1171           92815 :     itemid = PageGetItemId(state->origpage,
    1172           92815 :                            OffsetNumberPrev(split->firstrightoff));
    1173           92815 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1174                 : }
    1175                 : 
    1176                 : /*
    1177                 :  * Subroutine to get a firstright IndexTuple for a split point
    1178                 :  */
    1179                 : static inline IndexTuple
    1180           92935 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
    1181                 : {
    1182                 :     ItemId      itemid;
    1183                 : 
    1184           92935 :     if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
    1185              92 :         return state->newitem;
    1186                 : 
    1187           92843 :     itemid = PageGetItemId(state->origpage, split->firstrightoff);
    1188           92843 :     return (IndexTuple) PageGetItem(state->origpage, itemid);
    1189                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a