Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * nbtsplitloc.c
4 : : * Choose split point code for Postgres btree implementation.
5 : : *
6 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : : * Portions Copyright (c) 1994, Regents of the University of California
8 : : *
9 : : *
10 : : * IDENTIFICATION
11 : : * src/backend/access/nbtree/nbtsplitloc.c
12 : : *
13 : : *-------------------------------------------------------------------------
14 : : */
15 : : #include "postgres.h"
16 : :
17 : : #include "access/nbtree.h"
18 : : #include "common/int.h"
19 : :
20 : : typedef enum
21 : : {
22 : : /* strategy for searching through materialized list of split points */
23 : : SPLIT_DEFAULT, /* give some weight to truncation */
24 : : SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */
25 : : SPLIT_SINGLE_VALUE, /* leave left page almost full */
26 : : } FindSplitStrat;
27 : :
28 : : typedef struct
29 : : {
30 : : /* details of free space left by split */
31 : : int16 curdelta; /* current leftfree/rightfree delta */
32 : : int16 leftfree; /* space left on left page post-split */
33 : : int16 rightfree; /* space left on right page post-split */
34 : :
35 : : /* split point identifying fields (returned by _bt_findsplitloc) */
36 : : OffsetNumber firstrightoff; /* first origpage item on rightpage */
37 : : bool newitemonleft; /* new item goes on left, or right? */
38 : : } SplitPoint;
39 : :
40 : : typedef struct
41 : : {
42 : : /* context data for _bt_recsplitloc */
43 : : Relation rel; /* index relation */
44 : : Page origpage; /* page undergoing split */
45 : : IndexTuple newitem; /* new item (cause of page split) */
46 : : Size newitemsz; /* size of newitem (includes line pointer) */
47 : : bool is_leaf; /* T if splitting a leaf page */
48 : : bool is_rightmost; /* T if splitting rightmost page on level */
49 : : OffsetNumber newitemoff; /* where the new item is to be inserted */
50 : : int leftspace; /* space available for items on left page */
51 : : int rightspace; /* space available for items on right page */
52 : : int olddataitemstotal; /* space taken by old items */
53 : : Size minfirstrightsz; /* smallest firstright size */
54 : :
55 : : /* candidate split point data */
56 : : int maxsplits; /* maximum number of splits */
57 : : int nsplits; /* current number of splits */
58 : : SplitPoint *splits; /* all candidate split points for page */
59 : : int interval; /* current range of acceptable split points */
60 : : } FindSplitData;
61 : :
62 : : static void _bt_recsplitloc(FindSplitData *state,
63 : : OffsetNumber firstrightoff, bool newitemonleft,
64 : : int olddataitemstoleft,
65 : : Size firstrightofforigpagetuplesz);
66 : : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
67 : : bool usemult);
68 : : static int _bt_splitcmp(const void *arg1, const void *arg2);
69 : : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
70 : : int leaffillfactor, bool *usemult);
71 : : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
72 : : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
73 : : bool *newitemonleft, FindSplitStrat strategy);
74 : : static int _bt_defaultinterval(FindSplitData *state);
75 : : static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
76 : : SplitPoint *rightpage, FindSplitStrat *strategy);
77 : : static void _bt_interval_edges(FindSplitData *state,
78 : : SplitPoint **leftinterval, SplitPoint **rightinterval);
79 : : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
80 : : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
81 : : SplitPoint *split);
82 : : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
83 : : SplitPoint *split);
84 : :
85 : :
86 : : /*
87 : : * _bt_findsplitloc() -- find an appropriate place to split a page.
88 : : *
89 : : * The main goal here is to equalize the free space that will be on each
90 : : * split page, *after accounting for the inserted tuple*. (If we fail to
91 : : * account for it, we might find ourselves with too little room on the page
92 : : * that it needs to go into!)
93 : : *
94 : : * If the page is the rightmost page on its level, we instead try to arrange
95 : : * to leave the left split page fillfactor% full. In this way, when we are
96 : : * inserting successively increasing keys (consider sequences, timestamps,
97 : : * etc) we will end up with a tree whose pages are about fillfactor% full,
98 : : * instead of the 50% full result that we'd get without this special case.
99 : : * This is the same as nbtsort.c produces for a newly-created tree. Note
100 : : * that leaf and nonleaf pages use different fillfactors. Note also that
101 : : * there are a number of further special cases where fillfactor is not
102 : : * applied in the standard way.
103 : : *
104 : : * We are passed the intended insert position of the new tuple, expressed as
105 : : * the offsetnumber of the tuple it must go in front of (this could be
106 : : * maxoff+1 if the tuple is to go at the end). The new tuple itself is also
107 : : * passed, since it's needed to give some weight to how effective suffix
108 : : * truncation will be. The implementation picks the split point that
109 : : * maximizes the effectiveness of suffix truncation from a small list of
110 : : * alternative candidate split points that leave each side of the split with
111 : : * about the same share of free space. Suffix truncation is secondary to
112 : : * equalizing free space, except in cases with large numbers of duplicates.
113 : : * Note that it is always assumed that caller goes on to perform truncation,
114 : : * even with pg_upgrade'd indexes where that isn't actually the case
115 : : * (!heapkeyspace indexes). See nbtree/README for more information about
116 : : * suffix truncation.
117 : : *
118 : : * We return the index of the first existing tuple that should go on the
119 : : * righthand page (which is called firstrightoff), plus a boolean
120 : : * indicating whether the new tuple goes on the left or right page. You
121 : : * can think of the returned state as a point _between_ two adjacent data
122 : : * items (lastleft and firstright data items) on an imaginary version of
123 : : * origpage that already includes newitem. The bool is necessary to
124 : : * disambiguate the case where firstrightoff == newitemoff (i.e. it is
125 : : * sometimes needed to determine if the firstright tuple for the split is
126 : : * newitem rather than the tuple from origpage at offset firstrightoff).
127 : : */
128 : : OffsetNumber
1852 pg@bowt.ie 129 :CBC 10590 : _bt_findsplitloc(Relation rel,
130 : : Page origpage,
131 : : OffsetNumber newitemoff,
132 : : Size newitemsz,
133 : : IndexTuple newitem,
134 : : bool *newitemonleft)
135 : : {
136 : : BTPageOpaque opaque;
137 : : int leftspace,
138 : : rightspace,
139 : : olddataitemstotal,
140 : : olddataitemstoleft,
141 : : perfectpenalty,
142 : : leaffillfactor;
143 : : FindSplitData state;
144 : : FindSplitStrat strategy;
145 : : ItemId itemid;
146 : : OffsetNumber offnum,
147 : : maxoff,
148 : : firstrightoff;
149 : : double fillfactormult;
150 : : bool usemult;
151 : : SplitPoint leftpage,
152 : : rightpage;
153 : :
744 michael@paquier.xyz 154 : 10590 : opaque = BTPageGetOpaque(origpage);
1462 pg@bowt.ie 155 : 10590 : maxoff = PageGetMaxOffsetNumber(origpage);
156 : :
157 : : /* Total free space available on a btree page, after fixed overhead */
1852 158 : 10590 : leftspace = rightspace =
1462 159 : 10590 : PageGetPageSize(origpage) - SizeOfPageHeaderData -
160 : : MAXALIGN(sizeof(BTPageOpaqueData));
161 : :
162 : : /* The right page will have the same high key as the old page */
1852 163 [ + + ]: 10590 : if (!P_RIGHTMOST(opaque))
164 : : {
1462 165 : 4505 : itemid = PageGetItemId(origpage, P_HIKEY);
1852 166 : 4505 : rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
167 : : sizeof(ItemIdData));
168 : : }
169 : :
170 : : /* Count up total space in data items before actually scanning 'em */
1462 171 : 10590 : olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
1602 michael@paquier.xyz 172 [ + - - + : 10590 : leaffillfactor = BTGetFillFactor(rel);
+ + ]
173 : :
174 : : /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
1852 pg@bowt.ie 175 : 10590 : newitemsz += sizeof(ItemIdData);
176 : 10590 : state.rel = rel;
1462 177 : 10590 : state.origpage = origpage;
1852 178 : 10590 : state.newitem = newitem;
179 : 10590 : state.newitemsz = newitemsz;
180 : 10590 : state.is_leaf = P_ISLEAF(opaque);
181 : 10590 : state.is_rightmost = P_RIGHTMOST(opaque);
182 : 10590 : state.leftspace = leftspace;
183 : 10590 : state.rightspace = rightspace;
184 : 10590 : state.olddataitemstotal = olddataitemstotal;
185 : 10590 : state.minfirstrightsz = SIZE_MAX;
186 : 10590 : state.newitemoff = newitemoff;
187 : :
188 : : /* newitem cannot be a posting list item */
1509 189 [ - + ]: 10590 : Assert(!BTreeTupleIsPosting(newitem));
190 : :
191 : : /*
192 : : * nsplits should never exceed maxoff because there will be at most as
193 : : * many candidate split points as there are points _between_ tuples, once
194 : : * you imagine that the new item is already on the original page (the
195 : : * final number of splits may be slightly lower because not all points
196 : : * between tuples will be legal).
197 : : */
1852 198 : 10590 : state.maxsplits = maxoff;
199 : 10590 : state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
200 : 10590 : state.nsplits = 0;
201 : :
202 : : /*
203 : : * Scan through the data items and calculate space usage for a split at
204 : : * each possible position
205 : : */
206 : 10590 : olddataitemstoleft = 0;
207 : :
208 [ + + ]: 10590 : for (offnum = P_FIRSTDATAKEY(opaque);
209 [ + + ]: 3275633 : offnum <= maxoff;
210 : 3265043 : offnum = OffsetNumberNext(offnum))
211 : : {
212 : : Size itemsz;
213 : :
1462 214 : 3265043 : itemid = PageGetItemId(origpage, offnum);
1852 215 : 3265043 : itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
216 : :
217 : : /*
218 : : * When item offset number is not newitemoff, neither side of the
219 : : * split can be newitem. Record a split after the previous data item
220 : : * from original page, but before the current data item from original
221 : : * page. (_bt_recsplitloc() will reject the split when there are no
222 : : * previous items, which we rely on.)
223 : : */
1796 224 [ + + ]: 3265043 : if (offnum < newitemoff)
1852 225 : 2854129 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
1796 226 [ + + ]: 410914 : else if (offnum > newitemoff)
227 : 405255 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
228 : : else
229 : : {
230 : : /*
231 : : * Record a split after all "offnum < newitemoff" original page
232 : : * data items, but before newitem
233 : : */
1852 234 : 5659 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
235 : :
236 : : /*
237 : : * Record a split after newitem, but before data item from
238 : : * original page at offset newitemoff/current offset
239 : : */
1796 240 : 5659 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
241 : : }
242 : :
1852 243 : 3265043 : olddataitemstoleft += itemsz;
244 : : }
245 : :
246 : : /*
247 : : * Record a split after all original page data items, but before newitem.
248 : : * (Though only when it's possible that newitem will end up alone on new
249 : : * right page.)
250 : : */
251 [ - + ]: 10590 : Assert(olddataitemstoleft == olddataitemstotal);
252 [ + + ]: 10590 : if (newitemoff > maxoff)
253 : 4931 : _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
254 : :
255 : : /*
256 : : * I believe it is not possible to fail to find a feasible split, but just
257 : : * in case ...
258 : : */
259 [ - + ]: 10590 : if (state.nsplits == 0)
1852 pg@bowt.ie 260 [ # # ]:UBC 0 : elog(ERROR, "could not find a feasible split point for index \"%s\"",
261 : : RelationGetRelationName(rel));
262 : :
263 : : /*
264 : : * Start search for a split point among list of legal split points. Give
265 : : * primary consideration to equalizing available free space in each half
266 : : * of the split initially (start with default strategy), while applying
267 : : * rightmost and split-after-new-item optimizations where appropriate.
268 : : * Either of the two other fallback strategies may be required for cases
269 : : * with a large number of duplicates around the original/space-optimal
270 : : * split point.
271 : : *
272 : : * Default strategy gives some weight to suffix truncation in deciding a
273 : : * split point on leaf pages. It attempts to select a split point where a
274 : : * distinguishing attribute appears earlier in the new high key for the
275 : : * left side of the split, in order to maximize the number of trailing
276 : : * attributes that can be truncated away. Only candidate split points
277 : : * that imply an acceptable balance of free space on each side are
278 : : * considered. See _bt_defaultinterval().
279 : : */
1852 pg@bowt.ie 280 [ + + ]:CBC 10590 : if (!state.is_leaf)
281 : : {
282 : : /* fillfactormult only used on rightmost page */
283 : 129 : usemult = state.is_rightmost;
284 : 129 : fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
285 : : }
286 [ + + ]: 10461 : else if (state.is_rightmost)
287 : : {
288 : : /* Rightmost leaf page -- fillfactormult always used */
289 : 6006 : usemult = true;
290 : 6006 : fillfactormult = leaffillfactor / 100.0;
291 : : }
1847 292 [ + + ]: 4455 : else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
293 : : {
294 : : /*
295 : : * New item inserted at rightmost point among a localized grouping on
296 : : * a leaf page -- apply "split after new item" optimization, either by
297 : : * applying leaf fillfactor multiplier, or by choosing the exact split
298 : : * point that leaves newitem as lastleft. (usemult is set for us.)
299 : : */
300 [ + + ]: 360 : if (usemult)
301 : : {
302 : : /* fillfactormult should be set based on leaf fillfactor */
303 : 279 : fillfactormult = leaffillfactor / 100.0;
304 : : }
305 : : else
306 : : {
307 : : /* find precise split point after newitemoff */
308 [ + - ]: 22215 : for (int i = 0; i < state.nsplits; i++)
309 : : {
310 : 22215 : SplitPoint *split = state.splits + i;
311 : :
312 [ + + ]: 22215 : if (split->newitemonleft &&
1462 313 [ + - ]: 81 : newitemoff == split->firstrightoff)
314 : : {
1847 315 : 81 : pfree(state.splits);
316 : 81 : *newitemonleft = true;
317 : 81 : return newitemoff;
318 : : }
319 : : }
320 : :
321 : : /*
322 : : * Cannot legally split after newitemoff; proceed with split
323 : : * without using fillfactor multiplier. This is defensive, and
324 : : * should never be needed in practice.
325 : : */
1847 pg@bowt.ie 326 :UBC 0 : fillfactormult = 0.50;
327 : : }
328 : : }
329 : : else
330 : : {
331 : : /* Other leaf page. 50:50 page split. */
1852 pg@bowt.ie 332 :CBC 4095 : usemult = false;
333 : : /* fillfactormult not used, but be tidy */
334 : 4095 : fillfactormult = 0.50;
335 : : }
336 : :
337 : : /*
338 : : * Save leftmost and rightmost splits for page before original ordinal
339 : : * sort order is lost by delta/fillfactormult sort
340 : : */
341 : 10509 : leftpage = state.splits[0];
342 : 10509 : rightpage = state.splits[state.nsplits - 1];
343 : :
344 : : /* Give split points a fillfactormult-wise delta, and sort on deltas */
345 : 10509 : _bt_deltasortsplits(&state, fillfactormult, usemult);
346 : :
347 : : /* Determine split interval for default strategy */
1454 348 : 10509 : state.interval = _bt_defaultinterval(&state);
349 : :
350 : : /*
351 : : * Determine if default strategy/split interval will produce a
352 : : * sufficiently distinguishing split, or if we should change strategies.
353 : : * Alternative strategies change the range of split points that are
354 : : * considered acceptable (split interval), and possibly change
355 : : * fillfactormult, in order to deal with pages with a large number of
356 : : * duplicates gracefully.
357 : : *
358 : : * Pass low and high splits for the entire page (actually, they're for an
359 : : * imaginary version of the page that includes newitem). These are used
360 : : * when the initial split interval encloses split points that are full of
361 : : * duplicates, and we need to consider if it's even possible to avoid
362 : : * appending a heap TID.
363 : : */
1852 364 : 10509 : perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
365 : :
366 [ + + ]: 10509 : if (strategy == SPLIT_DEFAULT)
367 : : {
368 : : /*
369 : : * Default strategy worked out (always works out with internal page).
370 : : * Original split interval still stands.
371 : : */
372 : : }
373 : :
374 : : /*
375 : : * Many duplicates strategy is used when a heap TID would otherwise be
376 : : * appended, but the page isn't completely full of logical duplicates.
377 : : *
378 : : * The split interval is widened to include all legal candidate split
379 : : * points. There might be a few as two distinct values in the whole-page
380 : : * split interval, though it's also possible that most of the values on
381 : : * the page are unique. The final split point will either be to the
382 : : * immediate left or to the immediate right of the group of duplicate
383 : : * tuples that enclose the first/delta-optimal split point (perfect
384 : : * penalty was set so that the lowest delta split point that avoids
385 : : * appending a heap TID will be chosen). Maximizing the number of
386 : : * attributes that can be truncated away is not a goal of the many
387 : : * duplicates strategy.
388 : : *
389 : : * Single value strategy is used when it is impossible to avoid appending
390 : : * a heap TID. It arranges to leave the left page very full. This
391 : : * maximizes space utilization in cases where tuples with the same
392 : : * attribute values span many pages. Newly inserted duplicates will tend
393 : : * to have higher heap TID values, so we'll end up splitting to the right
394 : : * consistently. (Single value strategy is harmless though not
395 : : * particularly useful with !heapkeyspace indexes.)
396 : : */
397 [ + + ]: 185 : else if (strategy == SPLIT_MANY_DUPLICATES)
398 : : {
399 [ - + ]: 57 : Assert(state.is_leaf);
400 : : /* Shouldn't try to truncate away extra user attributes */
1735 401 [ - + ]: 57 : Assert(perfectpenalty ==
402 : : IndexRelationGetNumberOfKeyAttributes(state.rel));
403 : : /* No need to resort splits -- no change in fillfactormult/deltas */
1852 404 : 57 : state.interval = state.nsplits;
405 : : }
406 [ + - ]: 128 : else if (strategy == SPLIT_SINGLE_VALUE)
407 : : {
408 [ - + ]: 128 : Assert(state.is_leaf);
409 : : /* Split near the end of the page */
410 : 128 : usemult = true;
411 : 128 : fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
412 : : /* Resort split points with new delta */
413 : 128 : _bt_deltasortsplits(&state, fillfactormult, usemult);
414 : : /* Appending a heap TID is unavoidable, so interval of 1 is fine */
415 : 128 : state.interval = 1;
416 : : }
417 : :
418 : : /*
419 : : * Search among acceptable split points (using final split interval) for
420 : : * the entry that has the lowest penalty, and is therefore expected to
421 : : * maximize fan-out. Sets *newitemonleft for us.
422 : : */
1462 423 : 10509 : firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
424 : : strategy);
1852 425 : 10509 : pfree(state.splits);
426 : :
1462 427 : 10509 : return firstrightoff;
428 : : }
429 : :
430 : : /*
431 : : * Subroutine to record a particular point between two tuples (possibly the
432 : : * new item) on page (ie, combination of firstrightoff and newitemonleft
433 : : * settings) in *state for later analysis. This is also a convenient point to
434 : : * check if the split is legal (if it isn't, it won't be recorded).
435 : : *
436 : : * firstrightoff is the offset of the first item on the original page that
437 : : * goes to the right page, and firstrightofforigpagetuplesz is the size of
438 : : * that tuple. firstrightoff can be > max offset, which means that all the
439 : : * old items go to the left page and only the new item goes to the right page.
440 : : * We don't actually use firstrightofforigpagetuplesz in that case (actually,
441 : : * we don't use it for _any_ split where the firstright tuple happens to be
442 : : * newitem).
443 : : *
444 : : * olddataitemstoleft is the total size of all old items to the left of the
445 : : * split point that is recorded here when legal. Should not include
446 : : * newitemsz, since that is handled here.
447 : : */
448 : : static void
1852 449 : 3275633 : _bt_recsplitloc(FindSplitData *state,
450 : : OffsetNumber firstrightoff,
451 : : bool newitemonleft,
452 : : int olddataitemstoleft,
453 : : Size firstrightofforigpagetuplesz)
454 : : {
455 : : int16 leftfree,
456 : : rightfree;
457 : : Size firstrightsz;
1509 458 : 3275633 : Size postingsz = 0;
459 : : bool newitemisfirstright;
460 : :
461 : : /* Is the new item going to be split point's firstright tuple? */
1462 462 [ + + ]: 3291882 : newitemisfirstright = (firstrightoff == state->newitemoff &&
463 [ + + ]: 16249 : !newitemonleft);
464 : :
465 [ + + ]: 3275633 : if (newitemisfirstright)
466 : 10590 : firstrightsz = state->newitemsz;
467 : : else
468 : : {
469 : 3265043 : firstrightsz = firstrightofforigpagetuplesz;
470 : :
471 : : /*
472 : : * Calculate suffix truncation space saving when firstright tuple is a
473 : : * posting list tuple, though only when the tuple is over 64 bytes
474 : : * including line pointer overhead (arbitrary). This avoids accessing
475 : : * the tuple in cases where its posting list must be very small (if
476 : : * tuple has one at all).
477 : : *
478 : : * Note: We don't do this in the case where firstright tuple is
479 : : * newitem, since newitem cannot have a posting list.
480 : : */
481 [ + + + + ]: 3265043 : if (state->is_leaf && firstrightsz > 64)
482 : : {
483 : : ItemId itemid;
484 : : IndexTuple newhighkey;
485 : :
486 : 20314 : itemid = PageGetItemId(state->origpage, firstrightoff);
487 : 20314 : newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
488 : :
1509 489 [ + + ]: 20314 : if (BTreeTupleIsPosting(newhighkey))
490 : 19542 : postingsz = IndexTupleSize(newhighkey) -
491 : 9771 : BTreeTupleGetPostingOffset(newhighkey);
492 : : }
493 : : }
494 : :
495 : : /* Account for all the old tuples */
1852 496 : 3275633 : leftfree = state->leftspace - olddataitemstoleft;
497 : 3275633 : rightfree = state->rightspace -
498 : 3275633 : (state->olddataitemstotal - olddataitemstoleft);
499 : :
500 : : /*
501 : : * The first item on the right page becomes the high key of the left page;
502 : : * therefore it counts against left space as well as right space (we
503 : : * cannot assume that suffix truncation will make it any smaller). When
504 : : * index has included attributes, then those attributes of left page high
505 : : * key will be truncated leaving that page with slightly more free space.
506 : : * However, that shouldn't affect our ability to find valid split
507 : : * location, since we err in the direction of being pessimistic about free
508 : : * space on the left half. Besides, even when suffix truncation of
509 : : * non-TID attributes occurs, the new high key often won't even be a
510 : : * single MAXALIGN() quantum smaller than the firstright tuple it's based
511 : : * on.
512 : : *
513 : : * If we are on the leaf level, assume that suffix truncation cannot avoid
514 : : * adding a heap TID to the left half's new high key when splitting at the
515 : : * leaf level. In practice the new high key will often be smaller and
516 : : * will rarely be larger, but conservatively assume the worst case. We do
517 : : * go to the trouble of subtracting away posting list overhead, though
518 : : * only when it looks like it will make an appreciable difference.
519 : : * (Posting lists are the only case where truncation will typically make
520 : : * the final high key far smaller than firstright, so being a bit more
521 : : * precise there noticeably improves the balance of free space.)
522 : : */
523 [ + + ]: 3275633 : if (state->is_leaf)
1462 524 : 3273652 : leftfree -= (int16) (firstrightsz +
1509 525 : 3273652 : MAXALIGN(sizeof(ItemPointerData)) -
526 : : postingsz);
527 : : else
1462 528 : 1981 : leftfree -= (int16) firstrightsz;
529 : :
530 : : /* account for the new item */
1852 531 [ + + ]: 3275633 : if (newitemonleft)
532 : 410914 : leftfree -= (int16) state->newitemsz;
533 : : else
534 : 2864719 : rightfree -= (int16) state->newitemsz;
535 : :
536 : : /*
537 : : * If we are not on the leaf level, we will be able to discard the key
538 : : * data from the first item that winds up on the right page.
539 : : */
540 [ + + ]: 3275633 : if (!state->is_leaf)
1462 541 : 1981 : rightfree += (int16) firstrightsz -
542 : : (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
543 : :
544 : : /* Record split if legal */
1852 545 [ + + + + ]: 3275633 : if (leftfree >= 0 && rightfree >= 0)
546 : : {
547 [ - + ]: 3255788 : Assert(state->nsplits < state->maxsplits);
548 : :
549 : : /* Determine smallest firstright tuple size among legal splits */
1462 550 : 3255788 : state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
551 : :
1852 552 : 3255788 : state->splits[state->nsplits].curdelta = 0;
553 : 3255788 : state->splits[state->nsplits].leftfree = leftfree;
554 : 3255788 : state->splits[state->nsplits].rightfree = rightfree;
1462 555 : 3255788 : state->splits[state->nsplits].firstrightoff = firstrightoff;
1852 556 : 3255788 : state->splits[state->nsplits].newitemonleft = newitemonleft;
557 : 3255788 : state->nsplits++;
558 : : }
559 : 3275633 : }
560 : :
561 : : /*
562 : : * Subroutine to assign space deltas to materialized array of candidate split
563 : : * points based on current fillfactor, and to sort array using that fillfactor
564 : : */
565 : : static void
566 : 10637 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
567 : : bool usemult)
568 : : {
569 [ + + ]: 3238403 : for (int i = 0; i < state->nsplits; i++)
570 : : {
571 : 3227766 : SplitPoint *split = state->splits + i;
572 : : int16 delta;
573 : :
574 [ + + ]: 3227766 : if (usemult)
575 : 2145223 : delta = fillfactormult * split->leftfree -
576 : 2145223 : (1.0 - fillfactormult) * split->rightfree;
577 : : else
578 : 1082543 : delta = split->leftfree - split->rightfree;
579 : :
580 [ + + ]: 3227766 : if (delta < 0)
581 : 760239 : delta = -delta;
582 : :
583 : : /* Save delta */
584 : 3227766 : split->curdelta = delta;
585 : : }
586 : :
587 : 10637 : qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
588 : 10637 : }
589 : :
590 : : /*
591 : : * qsort-style comparator used by _bt_deltasortsplits()
592 : : */
593 : : static int
594 : 34858702 : _bt_splitcmp(const void *arg1, const void *arg2)
595 : : {
596 : 34858702 : SplitPoint *split1 = (SplitPoint *) arg1;
597 : 34858702 : SplitPoint *split2 = (SplitPoint *) arg2;
598 : :
58 nathan@postgresql.or 599 :GNC 34858702 : return pg_cmp_s16(split1->curdelta, split2->curdelta);
600 : : }
601 : :
602 : : /*
603 : : * Subroutine to determine whether or not a non-rightmost leaf page should be
604 : : * split immediately after the would-be original page offset for the
605 : : * new/incoming tuple (or should have leaf fillfactor applied when new item is
606 : : * to the right on original page). This is appropriate when there is a
607 : : * pattern of localized monotonically increasing insertions into a composite
608 : : * index, where leading attribute values form local groupings, and we
609 : : * anticipate further insertions of the same/current grouping (new item's
610 : : * grouping) in the near future. This can be thought of as a variation on
611 : : * applying leaf fillfactor during rightmost leaf page splits, since cases
612 : : * that benefit will converge on packing leaf pages leaffillfactor% full over
613 : : * time.
614 : : *
615 : : * We may leave extra free space remaining on the rightmost page of a "most
616 : : * significant column" grouping of tuples if that grouping never ends up
617 : : * having future insertions that use the free space. That effect is
618 : : * self-limiting; a future grouping that becomes the "nearest on the right"
619 : : * grouping of the affected grouping usually puts the extra free space to good
620 : : * use.
621 : : *
622 : : * Caller uses optimization when routine returns true, though the exact action
623 : : * taken by caller varies. Caller uses original leaf page fillfactor in
624 : : * standard way rather than using the new item offset directly when *usemult
625 : : * was also set to true here. Otherwise, caller applies optimization by
626 : : * locating the legal split point that makes the new tuple the lastleft tuple
627 : : * for the split.
628 : : */
629 : : static bool
1847 pg@bowt.ie 630 :CBC 4455 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
631 : : int leaffillfactor, bool *usemult)
632 : : {
633 : : int16 nkeyatts;
634 : : ItemId itemid;
635 : : IndexTuple tup;
636 : : int keepnatts;
637 : :
638 [ + - - + ]: 4455 : Assert(state->is_leaf && !state->is_rightmost);
639 : :
640 : 4455 : nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
641 : :
642 : : /* Single key indexes not considered here */
643 [ + + ]: 4455 : if (nkeyatts == 1)
644 : 561 : return false;
645 : :
646 : : /* Ascending insertion pattern never inferred when new item is first */
647 [ + + ]: 3894 : if (state->newitemoff == P_FIRSTKEY)
648 : 1 : return false;
649 : :
650 : : /*
651 : : * Only apply optimization on pages with equisized tuples, since ordinal
652 : : * keys are likely to be fixed-width. Testing if the new tuple is
653 : : * variable width directly might also work, but that fails to apply the
654 : : * optimization to indexes with a numeric_ops attribute.
655 : : *
656 : : * Conclude that page has equisized tuples when the new item is the same
657 : : * width as the smallest item observed during pass over page, and other
658 : : * non-pivot tuples must be the same width as well. (Note that the
659 : : * possibly-truncated existing high key isn't counted in
660 : : * olddataitemstotal, and must be subtracted from maxoff.)
661 : : */
662 [ + + ]: 3893 : if (state->newitemsz != state->minfirstrightsz)
663 : 1318 : return false;
664 [ + + ]: 2575 : if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
665 : 1919 : return false;
666 : :
667 : : /*
668 : : * Avoid applying optimization when tuples are wider than a tuple
669 : : * consisting of two non-NULL int8/int64 attributes (or four non-NULL
670 : : * int4/int32 attributes)
671 : : */
672 [ - + ]: 656 : if (state->newitemsz >
673 : : MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
674 : : sizeof(ItemIdData))
1847 pg@bowt.ie 675 :LBC (1) : return false;
676 : :
677 : : /*
678 : : * At least the first attribute's value must be equal to the corresponding
679 : : * value in previous tuple to apply optimization. New item cannot be a
680 : : * duplicate, either.
681 : : *
682 : : * Handle case where new item is to the right of all items on the existing
683 : : * page. This is suggestive of monotonically increasing insertions in
684 : : * itself, so the "heap TID adjacency" test is not applied here.
685 : : */
1847 pg@bowt.ie 686 [ + + ]:CBC 656 : if (state->newitemoff > maxoff)
687 : : {
1462 688 : 255 : itemid = PageGetItemId(state->origpage, maxoff);
689 : 255 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
1847 690 : 255 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
691 : :
692 [ + + + - ]: 255 : if (keepnatts > 1 && keepnatts <= nkeyatts)
693 : : {
694 : 254 : *usemult = true;
695 : 254 : return true;
696 : : }
697 : :
1847 pg@bowt.ie 698 :GBC 1 : return false;
699 : : }
700 : :
701 : : /*
702 : : * "Low cardinality leading column, high cardinality suffix column"
703 : : * indexes with a random insertion pattern (e.g., an index with a boolean
704 : : * column, such as an index on '(book_is_in_print, book_isbn)') present us
705 : : * with a risk of consistently misapplying the optimization. We're
706 : : * willing to accept very occasional misapplication of the optimization,
707 : : * provided the cases where we get it wrong are rare and self-limiting.
708 : : *
709 : : * Heap TID adjacency strongly suggests that the item just to the left was
710 : : * inserted very recently, which limits overapplication of the
711 : : * optimization. Besides, all inappropriate cases triggered here will
712 : : * still split in the middle of the page on average.
713 : : */
1462 pg@bowt.ie 714 :CBC 401 : itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
715 : 401 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
716 : : /* Do cheaper test first */
1509 717 [ + - ]: 401 : if (BTreeTupleIsPosting(tup) ||
718 [ + + ]: 401 : !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
1847 719 : 243 : return false;
720 : : /* Check same conditions as rightmost item case, too */
721 : 158 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
722 : :
723 [ + + + - ]: 158 : if (keepnatts > 1 && keepnatts <= nkeyatts)
724 : : {
725 : 106 : double interp = (double) state->newitemoff / ((double) maxoff + 1);
726 : 106 : double leaffillfactormult = (double) leaffillfactor / 100.0;
727 : :
728 : : /*
729 : : * Don't allow caller to split after a new item when it will result in
730 : : * a split point to the right of the point that a leaf fillfactor
731 : : * split would use -- have caller apply leaf fillfactor instead
732 : : */
733 : 106 : *usemult = interp > leaffillfactormult;
734 : :
735 : 106 : return true;
736 : : }
737 : :
738 : 52 : return false;
739 : : }
740 : :
741 : : /*
742 : : * Subroutine for determining if two heap TIDS are "adjacent".
743 : : *
744 : : * Adjacent means that the high TID is very likely to have been inserted into
745 : : * heap relation immediately after the low TID, probably during the current
746 : : * transaction.
747 : : */
748 : : static bool
749 : 401 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
750 : : {
751 : : BlockNumber lowblk,
752 : : highblk;
753 : :
754 : 401 : lowblk = ItemPointerGetBlockNumber(lowhtid);
755 : 401 : highblk = ItemPointerGetBlockNumber(highhtid);
756 : :
757 : : /* Make optimistic assumption of adjacency when heap blocks match */
758 [ + + ]: 401 : if (lowblk == highblk)
759 : 155 : return true;
760 : :
761 : : /* When heap block one up, second offset should be FirstOffsetNumber */
762 [ + + + + ]: 332 : if (lowblk + 1 == highblk &&
763 : 86 : ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
764 : 3 : return true;
765 : :
766 : 243 : return false;
767 : : }
768 : :
769 : : /*
770 : : * Subroutine to find the "best" split point among candidate split points.
771 : : * The best split point is the split point with the lowest penalty among split
772 : : * points that fall within current/final split interval. Penalty is an
773 : : * abstract score, with a definition that varies depending on whether we're
774 : : * splitting a leaf page or an internal page. See _bt_split_penalty() for
775 : : * details.
776 : : *
777 : : * "perfectpenalty" is assumed to be the lowest possible penalty among
778 : : * candidate split points. This allows us to return early without wasting
779 : : * cycles on calculating the first differing attribute for all candidate
780 : : * splits when that clearly cannot improve our choice (or when we only want a
781 : : * minimally distinguishing split point, and don't want to make the split any
782 : : * more unbalanced than is necessary).
783 : : *
784 : : * We return the index of the first existing tuple that should go on the right
785 : : * page, plus a boolean indicating if new item is on left of split point.
786 : : */
787 : : static OffsetNumber
1735 788 : 10509 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
789 : : bool *newitemonleft, FindSplitStrat strategy)
790 : : {
791 : : int bestpenalty,
792 : : lowsplit;
1852 793 : 10509 : int highsplit = Min(state->interval, state->nsplits);
794 : : SplitPoint *final;
795 : :
796 : 10509 : bestpenalty = INT_MAX;
797 : 10509 : lowsplit = 0;
798 [ + + ]: 23805 : for (int i = lowsplit; i < highsplit; i++)
799 : : {
800 : : int penalty;
801 : :
802 : 23792 : penalty = _bt_split_penalty(state, state->splits + i);
803 : :
804 [ + + ]: 23792 : if (penalty < bestpenalty)
805 : : {
806 : 12916 : bestpenalty = penalty;
807 : 12916 : lowsplit = i;
808 : : }
809 : :
1460 810 [ + + ]: 23792 : if (penalty <= perfectpenalty)
811 : 10496 : break;
812 : : }
813 : :
1735 814 : 10509 : final = &state->splits[lowsplit];
815 : :
816 : : /*
817 : : * There is a risk that the "many duplicates" strategy will repeatedly do
818 : : * the wrong thing when there are monotonically decreasing insertions to
819 : : * the right of a large group of duplicates. Repeated splits could leave
820 : : * a succession of right half pages with free space that can never be
821 : : * used. This must be avoided.
822 : : *
823 : : * Consider the example of the leftmost page in a single integer attribute
824 : : * NULLS FIRST index which is almost filled with NULLs. Monotonically
825 : : * decreasing integer insertions might cause the same leftmost page to
826 : : * split repeatedly at the same point. Each split derives its new high
827 : : * key from the lowest current value to the immediate right of the large
828 : : * group of NULLs, which will always be higher than all future integer
829 : : * insertions, directing all future integer insertions to the same
830 : : * leftmost page.
831 : : */
832 [ + + + + ]: 10509 : if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
1462 833 [ + + - + ]: 52 : !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
1454 pg@bowt.ie 834 [ # # ]:UBC 0 : final->firstrightoff < state->newitemoff + 9)
835 : : {
836 : : /*
837 : : * Avoid the problem by performing a 50:50 split when the new item is
838 : : * just to the right of the would-be "many duplicates" split point.
839 : : * (Note that the test used for an insert that is "just to the right"
840 : : * of the split point is conservative.)
841 : : */
1735 842 : 0 : final = &state->splits[0];
843 : : }
844 : :
1735 pg@bowt.ie 845 :CBC 10509 : *newitemonleft = final->newitemonleft;
1462 846 : 10509 : return final->firstrightoff;
847 : : }
848 : :
849 : : #define LEAF_SPLIT_DISTANCE 0.050
850 : : #define INTERNAL_SPLIT_DISTANCE 0.075
851 : :
852 : : /*
853 : : * Return a split interval to use for the default strategy. This is a limit
854 : : * on the number of candidate split points to give further consideration to.
855 : : * Only a fraction of all candidate splits points (those located at the start
856 : : * of the now-sorted splits array) fall within the split interval. Split
857 : : * interval is applied within _bt_bestsplitloc().
858 : : *
859 : : * Split interval represents an acceptable range of split points -- those that
860 : : * have leftfree and rightfree values that are acceptably balanced. The final
861 : : * split point chosen is the split point with the lowest "penalty" among split
862 : : * points in this split interval (unless we change our entire strategy, in
863 : : * which case the interval also changes -- see _bt_strategy()).
864 : : *
865 : : * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
866 : : * and sigma b for internal ("branch") splits. It's hard to provide a
867 : : * theoretical justification for the size of the split interval, though it's
868 : : * clear that a small split interval can make tuples on level L+1 much smaller
869 : : * on average, without noticeably affecting space utilization on level L.
870 : : * (Note that the way that we calculate split interval might need to change if
871 : : * suffix truncation is taught to truncate tuples "within" the last
872 : : * attribute/datum for data types like text, which is more or less how it is
873 : : * assumed to work in the paper.)
874 : : */
875 : : static int
1454 876 : 10509 : _bt_defaultinterval(FindSplitData *state)
877 : : {
878 : : SplitPoint *spaceoptimal;
879 : : int16 tolerance,
880 : : lowleftfree,
881 : : lowrightfree,
882 : : highleftfree,
883 : : highrightfree;
884 : :
885 : : /*
886 : : * Determine leftfree and rightfree values that are higher and lower than
887 : : * we're willing to tolerate. Note that the final split interval will be
888 : : * about 10% of nsplits in the common case where all non-pivot tuples
889 : : * (data items) from a leaf page are uniformly sized. We're a bit more
890 : : * aggressive when splitting internal pages.
891 : : */
892 [ + + ]: 10509 : if (state->is_leaf)
893 : 10380 : tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
894 : : else
895 : 129 : tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
896 : :
897 : : /* First candidate split point is the most evenly balanced */
898 : 10509 : spaceoptimal = state->splits;
899 : 10509 : lowleftfree = spaceoptimal->leftfree - tolerance;
900 : 10509 : lowrightfree = spaceoptimal->rightfree - tolerance;
901 : 10509 : highleftfree = spaceoptimal->leftfree + tolerance;
902 : 10509 : highrightfree = spaceoptimal->rightfree + tolerance;
903 : :
904 : : /*
905 : : * Iterate through split points, starting from the split immediately after
906 : : * 'spaceoptimal'. Find the first split point that divides free space so
907 : : * unevenly that including it in the split interval would be unacceptable.
908 : : */
909 [ + - ]: 324617 : for (int i = 1; i < state->nsplits; i++)
910 : : {
911 : 324617 : SplitPoint *split = state->splits + i;
912 : :
913 : : /* Cannot use curdelta here, since its value is often weighted */
914 [ + + + + ]: 324617 : if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
915 [ + + + + ]: 314522 : split->leftfree > highleftfree || split->rightfree > highrightfree)
916 : 10509 : return i;
917 : : }
918 : :
1454 pg@bowt.ie 919 :UBC 0 : return state->nsplits;
920 : : }
921 : :
922 : : /*
923 : : * Subroutine to decide whether split should use default strategy/initial
924 : : * split interval, or whether it should finish splitting the page using
925 : : * alternative strategies (this is only possible with leaf pages).
926 : : *
927 : : * Caller uses alternative strategy (or sticks with default strategy) based
928 : : * on how *strategy is set here. Return value is "perfect penalty", which is
929 : : * passed to _bt_bestsplitloc() as a final constraint on how far caller is
930 : : * willing to go to avoid appending a heap TID when using the many duplicates
931 : : * strategy (it also saves _bt_bestsplitloc() useless cycles).
932 : : */
933 : : static int
1852 pg@bowt.ie 934 :CBC 10509 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
935 : : SplitPoint *rightpage, FindSplitStrat *strategy)
936 : : {
937 : : IndexTuple leftmost,
938 : : rightmost;
939 : : SplitPoint *leftinterval,
940 : : *rightinterval;
941 : : int perfectpenalty;
942 : 10509 : int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
943 : :
944 : : /* Assume that alternative strategy won't be used for now */
945 : 10509 : *strategy = SPLIT_DEFAULT;
946 : :
947 : : /*
948 : : * Use smallest observed firstright item size for entire page (actually,
949 : : * entire imaginary version of page that includes newitem) as perfect
950 : : * penalty on internal pages. This can save cycles in the common case
951 : : * where most or all splits (not just splits within interval) have
952 : : * firstright tuples that are the same size.
953 : : */
954 [ + + ]: 10509 : if (!state->is_leaf)
955 : 129 : return state->minfirstrightsz;
956 : :
957 : : /*
958 : : * Use leftmost and rightmost tuples from leftmost and rightmost splits in
959 : : * current split interval
960 : : */
961 : 10380 : _bt_interval_edges(state, &leftinterval, &rightinterval);
962 : 10380 : leftmost = _bt_split_lastleft(state, leftinterval);
963 : 10380 : rightmost = _bt_split_firstright(state, rightinterval);
964 : :
965 : : /*
966 : : * If initial split interval can produce a split point that will at least
967 : : * avoid appending a heap TID in new high key, we're done. Finish split
968 : : * with default strategy and initial split interval.
969 : : */
970 : 10380 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
971 [ + + ]: 10380 : if (perfectpenalty <= indnkeyatts)
972 : 10163 : return perfectpenalty;
973 : :
974 : : /*
975 : : * Work out how caller should finish split when even their "perfect"
976 : : * penalty for initial/default split interval indicates that the interval
977 : : * does not contain even a single split that avoids appending a heap TID.
978 : : *
979 : : * Use the leftmost split's lastleft tuple and the rightmost split's
980 : : * firstright tuple to assess every possible split.
981 : : */
982 : 217 : leftmost = _bt_split_lastleft(state, leftpage);
983 : 217 : rightmost = _bt_split_firstright(state, rightpage);
984 : :
985 : : /*
986 : : * If page (including new item) has many duplicates but is not entirely
987 : : * full of duplicates, a many duplicates strategy split will be performed.
988 : : * If page is entirely full of duplicates, a single value strategy split
989 : : * will be performed.
990 : : */
991 : 217 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
992 [ + + ]: 217 : if (perfectpenalty <= indnkeyatts)
993 : : {
994 : 57 : *strategy = SPLIT_MANY_DUPLICATES;
995 : :
996 : : /*
997 : : * Many duplicates strategy should split at either side the group of
998 : : * duplicates that enclose the delta-optimal split point. Return
999 : : * indnkeyatts rather than the true perfect penalty to make that
1000 : : * happen. (If perfectpenalty was returned here then low cardinality
1001 : : * composite indexes could have continual unbalanced splits.)
1002 : : *
1003 : : * Note that caller won't go through with a many duplicates split in
1004 : : * rare cases where it looks like there are ever-decreasing insertions
1005 : : * to the immediate right of the split point. This must happen just
1006 : : * before a final decision is made, within _bt_bestsplitloc().
1007 : : */
1008 : 57 : return indnkeyatts;
1009 : : }
1010 : :
1011 : : /*
1012 : : * Single value strategy is only appropriate with ever-increasing heap
1013 : : * TIDs; otherwise, original default strategy split should proceed to
1014 : : * avoid pathological performance. Use page high key to infer if this is
1015 : : * the rightmost page among pages that store the same duplicate value.
1016 : : * This should not prevent insertions of heap TIDs that are slightly out
1017 : : * of order from using single value strategy, since that's expected with
1018 : : * concurrent inserters of the same duplicate value.
1019 : : */
1020 [ + + ]: 160 : else if (state->is_rightmost)
1021 : 115 : *strategy = SPLIT_SINGLE_VALUE;
1022 : : else
1023 : : {
1024 : : ItemId itemid;
1025 : : IndexTuple hikey;
1026 : :
1462 1027 : 45 : itemid = PageGetItemId(state->origpage, P_HIKEY);
1028 : 45 : hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
1852 1029 : 45 : perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
1030 : : state->newitem);
1031 [ + + ]: 45 : if (perfectpenalty <= indnkeyatts)
1032 : 13 : *strategy = SPLIT_SINGLE_VALUE;
1033 : : else
1034 : : {
1035 : : /*
1036 : : * Have caller finish split using default strategy, since page
1037 : : * does not appear to be the rightmost page for duplicates of the
1038 : : * value the page is filled with
1039 : : */
1040 : : }
1041 : : }
1042 : :
1043 : 160 : return perfectpenalty;
1044 : : }
1045 : :
1046 : : /*
1047 : : * Subroutine to locate leftmost and rightmost splits for current/default
1048 : : * split interval. Note that it will be the same split iff there is only one
1049 : : * split in interval.
1050 : : */
1051 : : static void
1052 : 10380 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
1053 : : SplitPoint **rightinterval)
1054 : : {
1055 : 10380 : int highsplit = Min(state->interval, state->nsplits);
1056 : : SplitPoint *deltaoptimal;
1057 : :
1058 : 10380 : deltaoptimal = state->splits;
1059 : 10380 : *leftinterval = NULL;
1060 : 10380 : *rightinterval = NULL;
1061 : :
1062 : : /*
1063 : : * Delta is an absolute distance to optimal split point, so both the
1064 : : * leftmost and rightmost split point will usually be at the end of the
1065 : : * array
1066 : : */
1067 [ + - ]: 20848 : for (int i = highsplit - 1; i >= 0; i--)
1068 : : {
1069 : 20848 : SplitPoint *distant = state->splits + i;
1070 : :
1462 1071 [ + + ]: 20848 : if (distant->firstrightoff < deltaoptimal->firstrightoff)
1072 : : {
1852 1073 [ + + ]: 10290 : if (*leftinterval == NULL)
1074 : 10157 : *leftinterval = distant;
1075 : : }
1462 1076 [ + + ]: 10558 : else if (distant->firstrightoff > deltaoptimal->firstrightoff)
1077 : : {
1852 1078 [ + + ]: 10325 : if (*rightinterval == NULL)
1079 : 10189 : *rightinterval = distant;
1080 : : }
1081 [ + + + + ]: 233 : else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1082 : : {
1083 : : /*
1084 : : * "incoming tuple will become firstright" (distant) is to the
1085 : : * left of "incoming tuple will become lastleft" (delta-optimal)
1086 : : */
1462 pg@bowt.ie 1087 [ - + ]:GBC 2 : Assert(distant->firstrightoff == state->newitemoff);
1852 1088 [ + - ]: 2 : if (*leftinterval == NULL)
1089 : 2 : *leftinterval = distant;
1090 : : }
1852 pg@bowt.ie 1091 [ + + + + ]:CBC 231 : else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1092 : : {
1093 : : /*
1094 : : * "incoming tuple will become lastleft" (distant) is to the right
1095 : : * of "incoming tuple will become firstright" (delta-optimal)
1096 : : */
1462 1097 [ - + ]: 4 : Assert(distant->firstrightoff == state->newitemoff);
1852 1098 [ + - ]: 4 : if (*rightinterval == NULL)
1099 : 4 : *rightinterval = distant;
1100 : : }
1101 : : else
1102 : : {
1103 : : /* There was only one or two splits in initial split interval */
1104 [ - + ]: 227 : Assert(distant == deltaoptimal);
1105 [ + + ]: 227 : if (*leftinterval == NULL)
1106 : 221 : *leftinterval = distant;
1107 [ + + ]: 227 : if (*rightinterval == NULL)
1108 : 187 : *rightinterval = distant;
1109 : : }
1110 : :
1111 [ + + + + ]: 20848 : if (*leftinterval && *rightinterval)
1112 : 10380 : return;
1113 : : }
1114 : :
1852 pg@bowt.ie 1115 :UBC 0 : Assert(false);
1116 : : }
1117 : :
1118 : : /*
1119 : : * Subroutine to find penalty for caller's candidate split point.
1120 : : *
1121 : : * On leaf pages, penalty is the attribute number that distinguishes each side
1122 : : * of a split. It's the last attribute that needs to be included in new high
1123 : : * key for left page. It can be greater than the number of key attributes in
1124 : : * cases where a heap TID will need to be appended during truncation.
1125 : : *
1126 : : * On internal pages, penalty is simply the size of the firstright tuple for
1127 : : * the split (including line pointer overhead). This tuple will become the
1128 : : * new high key for the left page.
1129 : : */
1130 : : static inline int
1852 pg@bowt.ie 1131 :CBC 23792 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
1132 : : {
1133 : : IndexTuple lastleft;
1134 : : IndexTuple firstright;
1135 : :
1136 [ + + ]: 23792 : if (!state->is_leaf)
1137 : : {
1138 : : ItemId itemid;
1139 : :
1140 [ + - ]: 137 : if (!split->newitemonleft &&
1462 1141 [ + + ]: 137 : split->firstrightoff == state->newitemoff)
1852 1142 : 12 : return state->newitemsz;
1143 : :
1462 1144 : 125 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1145 : :
1852 1146 : 125 : return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1147 : : }
1148 : :
1462 1149 : 23655 : lastleft = _bt_split_lastleft(state, split);
1150 : 23655 : firstright = _bt_split_firstright(state, split);
1151 : :
1152 : 23655 : return _bt_keep_natts_fast(state->rel, lastleft, firstright);
1153 : : }
1154 : :
1155 : : /*
1156 : : * Subroutine to get a lastleft IndexTuple for a split point
1157 : : */
1158 : : static inline IndexTuple
1852 1159 : 34252 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
1160 : : {
1161 : : ItemId itemid;
1162 : :
1462 1163 [ + + + + ]: 34252 : if (split->newitemonleft && split->firstrightoff == state->newitemoff)
1852 1164 : 171 : return state->newitem;
1165 : :
1462 1166 : 34081 : itemid = PageGetItemId(state->origpage,
1167 : 34081 : OffsetNumberPrev(split->firstrightoff));
1168 : 34081 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1169 : : }
1170 : :
1171 : : /*
1172 : : * Subroutine to get a firstright IndexTuple for a split point
1173 : : */
1174 : : static inline IndexTuple
1852 1175 : 34252 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
1176 : : {
1177 : : ItemId itemid;
1178 : :
1462 1179 [ + + + + ]: 34252 : if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
1852 1180 : 97 : return state->newitem;
1181 : :
1462 1182 : 34155 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1183 : 34155 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1184 : : }
|