Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * nbtsplitloc.c
4 : * Choose split point code for Postgres btree implementation.
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/nbtree/nbtsplitloc.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include "access/nbtree.h"
18 : #include "storage/lmgr.h"
19 :
20 : typedef enum
21 : {
22 : /* strategy for searching through materialized list of split points */
23 : SPLIT_DEFAULT, /* give some weight to truncation */
24 : SPLIT_MANY_DUPLICATES, /* find minimally distinguishing point */
25 : SPLIT_SINGLE_VALUE /* leave left page almost full */
26 : } FindSplitStrat;
27 :
28 : typedef struct
29 : {
30 : /* details of free space left by split */
31 : int16 curdelta; /* current leftfree/rightfree delta */
32 : int16 leftfree; /* space left on left page post-split */
33 : int16 rightfree; /* space left on right page post-split */
34 :
35 : /* split point identifying fields (returned by _bt_findsplitloc) */
36 : OffsetNumber firstrightoff; /* first origpage item on rightpage */
37 : bool newitemonleft; /* new item goes on left, or right? */
38 : } SplitPoint;
39 :
40 : typedef struct
41 : {
42 : /* context data for _bt_recsplitloc */
43 : Relation rel; /* index relation */
44 : Page origpage; /* page undergoing split */
45 : IndexTuple newitem; /* new item (cause of page split) */
46 : Size newitemsz; /* size of newitem (includes line pointer) */
47 : bool is_leaf; /* T if splitting a leaf page */
48 : bool is_rightmost; /* T if splitting rightmost page on level */
49 : OffsetNumber newitemoff; /* where the new item is to be inserted */
50 : int leftspace; /* space available for items on left page */
51 : int rightspace; /* space available for items on right page */
52 : int olddataitemstotal; /* space taken by old items */
53 : Size minfirstrightsz; /* smallest firstright size */
54 :
55 : /* candidate split point data */
56 : int maxsplits; /* maximum number of splits */
57 : int nsplits; /* current number of splits */
58 : SplitPoint *splits; /* all candidate split points for page */
59 : int interval; /* current range of acceptable split points */
60 : } FindSplitData;
61 :
62 : static void _bt_recsplitloc(FindSplitData *state,
63 : OffsetNumber firstrightoff, bool newitemonleft,
64 : int olddataitemstoleft,
65 : Size firstrightofforigpagetuplesz);
66 : static void _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
67 : bool usemult);
68 : static int _bt_splitcmp(const void *arg1, const void *arg2);
69 : static bool _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
70 : int leaffillfactor, bool *usemult);
71 : static bool _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid);
72 : static OffsetNumber _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
73 : bool *newitemonleft, FindSplitStrat strategy);
74 : static int _bt_defaultinterval(FindSplitData *state);
75 : static int _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
76 : SplitPoint *rightpage, FindSplitStrat *strategy);
77 : static void _bt_interval_edges(FindSplitData *state,
78 : SplitPoint **leftinterval, SplitPoint **rightinterval);
79 : static inline int _bt_split_penalty(FindSplitData *state, SplitPoint *split);
80 : static inline IndexTuple _bt_split_lastleft(FindSplitData *state,
81 : SplitPoint *split);
82 : static inline IndexTuple _bt_split_firstright(FindSplitData *state,
83 : SplitPoint *split);
84 :
85 :
86 : /*
87 : * _bt_findsplitloc() -- find an appropriate place to split a page.
88 : *
89 : * The main goal here is to equalize the free space that will be on each
90 : * split page, *after accounting for the inserted tuple*. (If we fail to
91 : * account for it, we might find ourselves with too little room on the page
92 : * that it needs to go into!)
93 : *
94 : * If the page is the rightmost page on its level, we instead try to arrange
95 : * to leave the left split page fillfactor% full. In this way, when we are
96 : * inserting successively increasing keys (consider sequences, timestamps,
97 : * etc) we will end up with a tree whose pages are about fillfactor% full,
98 : * instead of the 50% full result that we'd get without this special case.
99 : * This is the same as nbtsort.c produces for a newly-created tree. Note
100 : * that leaf and nonleaf pages use different fillfactors. Note also that
101 : * there are a number of further special cases where fillfactor is not
102 : * applied in the standard way.
103 : *
104 : * We are passed the intended insert position of the new tuple, expressed as
105 : * the offsetnumber of the tuple it must go in front of (this could be
106 : * maxoff+1 if the tuple is to go at the end). The new tuple itself is also
107 : * passed, since it's needed to give some weight to how effective suffix
108 : * truncation will be. The implementation picks the split point that
109 : * maximizes the effectiveness of suffix truncation from a small list of
110 : * alternative candidate split points that leave each side of the split with
111 : * about the same share of free space. Suffix truncation is secondary to
112 : * equalizing free space, except in cases with large numbers of duplicates.
113 : * Note that it is always assumed that caller goes on to perform truncation,
114 : * even with pg_upgrade'd indexes where that isn't actually the case
115 : * (!heapkeyspace indexes). See nbtree/README for more information about
116 : * suffix truncation.
117 : *
118 : * We return the index of the first existing tuple that should go on the
119 : * righthand page (which is called firstrightoff), plus a boolean
120 : * indicating whether the new tuple goes on the left or right page. You
121 : * can think of the returned state as a point _between_ two adjacent data
122 : * items (laftleft and firstright data items) on an imaginary version of
123 : * origpage that already includes newitem. The bool is necessary to
124 : * disambiguate the case where firstrightoff == newitemoff (i.e. it is
125 : * sometimes needed to determine if the firstright tuple for the split is
126 : * newitem rather than the tuple from origpage at offset firstrightoff).
127 : */
128 : OffsetNumber
1481 pg 129 CBC 23479 : _bt_findsplitloc(Relation rel,
130 : Page origpage,
131 : OffsetNumber newitemoff,
132 : Size newitemsz,
133 : IndexTuple newitem,
134 : bool *newitemonleft)
135 : {
136 : BTPageOpaque opaque;
137 : int leftspace,
138 : rightspace,
139 : olddataitemstotal,
140 : olddataitemstoleft,
141 : perfectpenalty,
142 : leaffillfactor;
143 : FindSplitData state;
144 : FindSplitStrat strategy;
145 : ItemId itemid;
146 : OffsetNumber offnum,
147 : maxoff,
148 : firstrightoff;
149 : double fillfactormult;
150 : bool usemult;
151 : SplitPoint leftpage,
152 : rightpage;
153 :
373 michael 154 23479 : opaque = BTPageGetOpaque(origpage);
1091 pg 155 23479 : maxoff = PageGetMaxOffsetNumber(origpage);
156 :
157 : /* Total free space available on a btree page, after fixed overhead */
1481 158 23479 : leftspace = rightspace =
1091 159 23479 : PageGetPageSize(origpage) - SizeOfPageHeaderData -
160 : MAXALIGN(sizeof(BTPageOpaqueData));
161 :
162 : /* The right page will have the same high key as the old page */
1481 163 23479 : if (!P_RIGHTMOST(opaque))
164 : {
1091 165 11299 : itemid = PageGetItemId(origpage, P_HIKEY);
1481 166 11299 : rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
167 : sizeof(ItemIdData));
168 : }
169 :
170 : /* Count up total space in data items before actually scanning 'em */
1091 171 23479 : olddataitemstotal = rightspace - (int) PageGetExactFreeSpace(origpage);
1231 michael 172 23479 : leaffillfactor = BTGetFillFactor(rel);
173 :
174 : /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
1481 pg 175 23479 : newitemsz += sizeof(ItemIdData);
176 23479 : state.rel = rel;
1091 177 23479 : state.origpage = origpage;
1481 178 23479 : state.newitem = newitem;
179 23479 : state.newitemsz = newitemsz;
180 23479 : state.is_leaf = P_ISLEAF(opaque);
181 23479 : state.is_rightmost = P_RIGHTMOST(opaque);
182 23479 : state.leftspace = leftspace;
183 23479 : state.rightspace = rightspace;
184 23479 : state.olddataitemstotal = olddataitemstotal;
185 23479 : state.minfirstrightsz = SIZE_MAX;
186 23479 : state.newitemoff = newitemoff;
187 :
188 : /* newitem cannot be a posting list item */
1138 189 23479 : Assert(!BTreeTupleIsPosting(newitem));
190 :
191 : /*
192 : * nsplits should never exceed maxoff because there will be at most as
193 : * many candidate split points as there are points _between_ tuples, once
194 : * you imagine that the new item is already on the original page (the
195 : * final number of splits may be slightly lower because not all points
196 : * between tuples will be legal).
197 : */
1481 198 23479 : state.maxsplits = maxoff;
199 23479 : state.splits = palloc(sizeof(SplitPoint) * state.maxsplits);
200 23479 : state.nsplits = 0;
201 :
202 : /*
203 : * Scan through the data items and calculate space usage for a split at
204 : * each possible position
205 : */
206 23479 : olddataitemstoleft = 0;
207 :
208 23479 : for (offnum = P_FIRSTDATAKEY(opaque);
209 6963551 : offnum <= maxoff;
210 6940072 : offnum = OffsetNumberNext(offnum))
211 : {
212 : Size itemsz;
213 :
1091 214 6940072 : itemid = PageGetItemId(origpage, offnum);
1481 215 6940072 : itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
216 :
217 : /*
218 : * When item offset number is not newitemoff, neither side of the
219 : * split can be newitem. Record a split after the previous data item
220 : * from original page, but before the current data item from original
221 : * page. (_bt_recsplitloc() will reject the split when there are no
222 : * previous items, which we rely on.)
223 : */
1425 224 6940072 : if (offnum < newitemoff)
1481 225 5842433 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
1425 226 1097639 : else if (offnum > newitemoff)
227 1083856 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
228 : else
229 : {
230 : /*
231 : * Record a split after all "offnum < newitemoff" original page
232 : * data items, but before newitem
233 : */
1481 234 13783 : _bt_recsplitloc(&state, offnum, false, olddataitemstoleft, itemsz);
235 :
236 : /*
237 : * Record a split after newitem, but before data item from
238 : * original page at offset newitemoff/current offset
239 : */
1425 240 13783 : _bt_recsplitloc(&state, offnum, true, olddataitemstoleft, itemsz);
241 : }
242 :
1481 243 6940072 : olddataitemstoleft += itemsz;
244 : }
245 :
246 : /*
247 : * Record a split after all original page data items, but before newitem.
248 : * (Though only when it's possible that newitem will end up alone on new
249 : * right page.)
250 : */
251 23479 : Assert(olddataitemstoleft == olddataitemstotal);
252 23479 : if (newitemoff > maxoff)
253 9696 : _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0);
254 :
255 : /*
256 : * I believe it is not possible to fail to find a feasible split, but just
257 : * in case ...
258 : */
259 23479 : if (state.nsplits == 0)
1481 pg 260 UBC 0 : elog(ERROR, "could not find a feasible split point for index \"%s\"",
261 : RelationGetRelationName(rel));
262 :
263 : /*
264 : * Start search for a split point among list of legal split points. Give
265 : * primary consideration to equalizing available free space in each half
266 : * of the split initially (start with default strategy), while applying
267 : * rightmost and split-after-new-item optimizations where appropriate.
268 : * Either of the two other fallback strategies may be required for cases
269 : * with a large number of duplicates around the original/space-optimal
270 : * split point.
271 : *
272 : * Default strategy gives some weight to suffix truncation in deciding a
273 : * split point on leaf pages. It attempts to select a split point where a
274 : * distinguishing attribute appears earlier in the new high key for the
275 : * left side of the split, in order to maximize the number of trailing
276 : * attributes that can be truncated away. Only candidate split points
277 : * that imply an acceptable balance of free space on each side are
278 : * considered. See _bt_defaultinterval().
279 : */
1481 pg 280 CBC 23479 : if (!state.is_leaf)
281 : {
282 : /* fillfactormult only used on rightmost page */
283 101 : usemult = state.is_rightmost;
284 101 : fillfactormult = BTREE_NONLEAF_FILLFACTOR / 100.0;
285 : }
286 23378 : else if (state.is_rightmost)
287 : {
288 : /* Rightmost leaf page -- fillfactormult always used */
289 12104 : usemult = true;
290 12104 : fillfactormult = leaffillfactor / 100.0;
291 : }
1476 292 11274 : else if (_bt_afternewitemoff(&state, maxoff, leaffillfactor, &usemult))
293 : {
294 : /*
295 : * New item inserted at rightmost point among a localized grouping on
296 : * a leaf page -- apply "split after new item" optimization, either by
297 : * applying leaf fillfactor multiplier, or by choosing the exact split
298 : * point that leaves newitem as lastleft. (usemult is set for us.)
299 : */
300 630 : if (usemult)
301 : {
302 : /* fillfactormult should be set based on leaf fillfactor */
303 544 : fillfactormult = leaffillfactor / 100.0;
304 : }
305 : else
306 : {
307 : /* find precise split point after newitemoff */
308 21267 : for (int i = 0; i < state.nsplits; i++)
309 : {
310 21267 : SplitPoint *split = state.splits + i;
311 :
312 21267 : if (split->newitemonleft &&
1091 313 86 : newitemoff == split->firstrightoff)
314 : {
1476 315 86 : pfree(state.splits);
316 86 : *newitemonleft = true;
317 86 : return newitemoff;
318 : }
319 : }
320 :
321 : /*
322 : * Cannot legally split after newitemoff; proceed with split
323 : * without using fillfactor multiplier. This is defensive, and
324 : * should never be needed in practice.
325 : */
1476 pg 326 UBC 0 : fillfactormult = 0.50;
327 : }
328 : }
329 : else
330 : {
331 : /* Other leaf page. 50:50 page split. */
1481 pg 332 CBC 10644 : usemult = false;
333 : /* fillfactormult not used, but be tidy */
334 10644 : fillfactormult = 0.50;
335 : }
336 :
337 : /*
338 : * Save leftmost and rightmost splits for page before original ordinal
339 : * sort order is lost by delta/fillfactormult sort
340 : */
341 23393 : leftpage = state.splits[0];
342 23393 : rightpage = state.splits[state.nsplits - 1];
343 :
344 : /* Give split points a fillfactormult-wise delta, and sort on deltas */
345 23393 : _bt_deltasortsplits(&state, fillfactormult, usemult);
346 :
347 : /* Determine split interval for default strategy */
1083 348 23393 : state.interval = _bt_defaultinterval(&state);
349 :
350 : /*
351 : * Determine if default strategy/split interval will produce a
352 : * sufficiently distinguishing split, or if we should change strategies.
353 : * Alternative strategies change the range of split points that are
354 : * considered acceptable (split interval), and possibly change
355 : * fillfactormult, in order to deal with pages with a large number of
356 : * duplicates gracefully.
357 : *
358 : * Pass low and high splits for the entire page (actually, they're for an
359 : * imaginary version of the page that includes newitem). These are used
360 : * when the initial split interval encloses split points that are full of
361 : * duplicates, and we need to consider if it's even possible to avoid
362 : * appending a heap TID.
363 : */
1481 364 23393 : perfectpenalty = _bt_strategy(&state, &leftpage, &rightpage, &strategy);
365 :
366 23393 : if (strategy == SPLIT_DEFAULT)
367 : {
368 : /*
369 : * Default strategy worked out (always works out with internal page).
370 : * Original split interval still stands.
371 : */
372 : }
373 :
374 : /*
375 : * Many duplicates strategy is used when a heap TID would otherwise be
376 : * appended, but the page isn't completely full of logical duplicates.
377 : *
378 : * The split interval is widened to include all legal candidate split
379 : * points. There might be a few as two distinct values in the whole-page
380 : * split interval, though it's also possible that most of the values on
381 : * the page are unique. The final split point will either be to the
382 : * immediate left or to the immediate right of the group of duplicate
383 : * tuples that enclose the first/delta-optimal split point (perfect
384 : * penalty was set so that the lowest delta split point that avoids
385 : * appending a heap TID will be chosen). Maximizing the number of
386 : * attributes that can be truncated away is not a goal of the many
387 : * duplicates strategy.
388 : *
389 : * Single value strategy is used when it is impossible to avoid appending
390 : * a heap TID. It arranges to leave the left page very full. This
391 : * maximizes space utilization in cases where tuples with the same
392 : * attribute values span many pages. Newly inserted duplicates will tend
393 : * to have higher heap TID values, so we'll end up splitting to the right
394 : * consistently. (Single value strategy is harmless though not
395 : * particularly useful with !heapkeyspace indexes.)
396 : */
397 459 : else if (strategy == SPLIT_MANY_DUPLICATES)
398 : {
399 325 : Assert(state.is_leaf);
400 : /* Shouldn't try to truncate away extra user attributes */
1364 401 325 : Assert(perfectpenalty ==
402 : IndexRelationGetNumberOfKeyAttributes(state.rel));
403 : /* No need to resort splits -- no change in fillfactormult/deltas */
1481 404 325 : state.interval = state.nsplits;
405 : }
406 134 : else if (strategy == SPLIT_SINGLE_VALUE)
407 : {
408 134 : Assert(state.is_leaf);
409 : /* Split near the end of the page */
410 134 : usemult = true;
411 134 : fillfactormult = BTREE_SINGLEVAL_FILLFACTOR / 100.0;
412 : /* Resort split points with new delta */
413 134 : _bt_deltasortsplits(&state, fillfactormult, usemult);
414 : /* Appending a heap TID is unavoidable, so interval of 1 is fine */
415 134 : state.interval = 1;
416 : }
417 :
418 : /*
419 : * Search among acceptable split points (using final split interval) for
420 : * the entry that has the lowest penalty, and is therefore expected to
421 : * maximize fan-out. Sets *newitemonleft for us.
422 : */
1091 423 23393 : firstrightoff = _bt_bestsplitloc(&state, perfectpenalty, newitemonleft,
424 : strategy);
1481 425 23393 : pfree(state.splits);
426 :
1091 427 23393 : return firstrightoff;
428 : }
429 :
430 : /*
431 : * Subroutine to record a particular point between two tuples (possibly the
432 : * new item) on page (ie, combination of firstrightoff and newitemonleft
433 : * settings) in *state for later analysis. This is also a convenient point to
434 : * check if the split is legal (if it isn't, it won't be recorded).
435 : *
436 : * firstrightoff is the offset of the first item on the original page that
437 : * goes to the right page, and firstrightofforigpagetuplesz is the size of
438 : * that tuple. firstrightoff can be > max offset, which means that all the
439 : * old items go to the left page and only the new item goes to the right page.
440 : * We don't actually use firstrightofforigpagetuplesz in that case (actually,
441 : * we don't use it for _any_ split where the firstright tuple happens to be
442 : * newitem).
443 : *
444 : * olddataitemstoleft is the total size of all old items to the left of the
445 : * split point that is recorded here when legal. Should not include
446 : * newitemsz, since that is handled here.
447 : */
448 : static void
1481 449 6963551 : _bt_recsplitloc(FindSplitData *state,
450 : OffsetNumber firstrightoff,
451 : bool newitemonleft,
452 : int olddataitemstoleft,
453 : Size firstrightofforigpagetuplesz)
454 : {
455 : int16 leftfree,
456 : rightfree;
457 : Size firstrightsz;
1138 458 6963551 : Size postingsz = 0;
459 : bool newitemisfirstright;
460 :
461 : /* Is the new item going to be split point's firstright tuple? */
1091 462 7000813 : newitemisfirstright = (firstrightoff == state->newitemoff &&
463 37262 : !newitemonleft);
464 :
465 6963551 : if (newitemisfirstright)
466 23479 : firstrightsz = state->newitemsz;
467 : else
468 : {
469 6940072 : firstrightsz = firstrightofforigpagetuplesz;
470 :
471 : /*
472 : * Calculate suffix truncation space saving when firstright tuple is a
473 : * posting list tuple, though only when the tuple is over 64 bytes
474 : * including line pointer overhead (arbitrary). This avoids accessing
475 : * the tuple in cases where its posting list must be very small (if
476 : * tuple has one at all).
477 : *
478 : * Note: We don't do this in the case where firstright tuple is
479 : * newitem, since newitem cannot have a posting list.
480 : */
481 6940072 : if (state->is_leaf && firstrightsz > 64)
482 : {
483 : ItemId itemid;
484 : IndexTuple newhighkey;
485 :
486 34554 : itemid = PageGetItemId(state->origpage, firstrightoff);
487 34554 : newhighkey = (IndexTuple) PageGetItem(state->origpage, itemid);
488 :
1138 489 34554 : if (BTreeTupleIsPosting(newhighkey))
490 47272 : postingsz = IndexTupleSize(newhighkey) -
491 23636 : BTreeTupleGetPostingOffset(newhighkey);
492 : }
493 : }
494 :
495 : /* Account for all the old tuples */
1481 496 6963551 : leftfree = state->leftspace - olddataitemstoleft;
497 6963551 : rightfree = state->rightspace -
498 6963551 : (state->olddataitemstotal - olddataitemstoleft);
499 :
500 : /*
501 : * The first item on the right page becomes the high key of the left page;
502 : * therefore it counts against left space as well as right space (we
503 : * cannot assume that suffix truncation will make it any smaller). When
504 : * index has included attributes, then those attributes of left page high
505 : * key will be truncated leaving that page with slightly more free space.
506 : * However, that shouldn't affect our ability to find valid split
507 : * location, since we err in the direction of being pessimistic about free
508 : * space on the left half. Besides, even when suffix truncation of
509 : * non-TID attributes occurs, the new high key often won't even be a
510 : * single MAXALIGN() quantum smaller than the firstright tuple it's based
511 : * on.
512 : *
513 : * If we are on the leaf level, assume that suffix truncation cannot avoid
514 : * adding a heap TID to the left half's new high key when splitting at the
515 : * leaf level. In practice the new high key will often be smaller and
516 : * will rarely be larger, but conservatively assume the worst case. We do
517 : * go to the trouble of subtracting away posting list overhead, though
518 : * only when it looks like it will make an appreciable difference.
519 : * (Posting lists are the only case where truncation will typically make
520 : * the final high key far smaller than firstright, so being a bit more
521 : * precise there noticeably improves the balance of free space.)
522 : */
523 6963551 : if (state->is_leaf)
1091 524 6961659 : leftfree -= (int16) (firstrightsz +
1138 525 6961659 : MAXALIGN(sizeof(ItemPointerData)) -
526 : postingsz);
527 : else
1091 528 1892 : leftfree -= (int16) firstrightsz;
529 :
530 : /* account for the new item */
1481 531 6963551 : if (newitemonleft)
532 1097639 : leftfree -= (int16) state->newitemsz;
533 : else
534 5865912 : rightfree -= (int16) state->newitemsz;
535 :
536 : /*
537 : * If we are not on the leaf level, we will be able to discard the key
538 : * data from the first item that winds up on the right page.
539 : */
540 6963551 : if (!state->is_leaf)
1091 541 1892 : rightfree += (int16) firstrightsz -
542 : (int16) (MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData));
543 :
544 : /* Record split if legal */
1481 545 6963551 : if (leftfree >= 0 && rightfree >= 0)
546 : {
547 6916323 : Assert(state->nsplits < state->maxsplits);
548 :
549 : /* Determine smallest firstright tuple size among legal splits */
1091 550 6916323 : state->minfirstrightsz = Min(state->minfirstrightsz, firstrightsz);
551 :
1481 552 6916323 : state->splits[state->nsplits].curdelta = 0;
553 6916323 : state->splits[state->nsplits].leftfree = leftfree;
554 6916323 : state->splits[state->nsplits].rightfree = rightfree;
1091 555 6916323 : state->splits[state->nsplits].firstrightoff = firstrightoff;
1481 556 6916323 : state->splits[state->nsplits].newitemonleft = newitemonleft;
557 6916323 : state->nsplits++;
558 : }
559 6963551 : }
560 :
561 : /*
562 : * Subroutine to assign space deltas to materialized array of candidate split
563 : * points based on current fillfactor, and to sort array using that fillfactor
564 : */
565 : static void
566 23527 : _bt_deltasortsplits(FindSplitData *state, double fillfactormult,
567 : bool usemult)
568 : {
569 6909689 : for (int i = 0; i < state->nsplits; i++)
570 : {
571 6886162 : SplitPoint *split = state->splits + i;
572 : int16 delta;
573 :
574 6886162 : if (usemult)
575 4115154 : delta = fillfactormult * split->leftfree -
576 4115154 : (1.0 - fillfactormult) * split->rightfree;
577 : else
578 2771008 : delta = split->leftfree - split->rightfree;
579 :
580 6886162 : if (delta < 0)
581 1785017 : delta = -delta;
582 :
583 : /* Save delta */
584 6886162 : split->curdelta = delta;
585 : }
586 :
587 23527 : qsort(state->splits, state->nsplits, sizeof(SplitPoint), _bt_splitcmp);
588 23527 : }
589 :
590 : /*
591 : * qsort-style comparator used by _bt_deltasortsplits()
592 : */
593 : static int
594 73252432 : _bt_splitcmp(const void *arg1, const void *arg2)
595 : {
596 73252432 : SplitPoint *split1 = (SplitPoint *) arg1;
597 73252432 : SplitPoint *split2 = (SplitPoint *) arg2;
598 :
599 73252432 : if (split1->curdelta > split2->curdelta)
600 28609584 : return 1;
601 44642848 : if (split1->curdelta < split2->curdelta)
602 44523761 : return -1;
603 :
604 119087 : return 0;
605 : }
606 :
607 : /*
608 : * Subroutine to determine whether or not a non-rightmost leaf page should be
609 : * split immediately after the would-be original page offset for the
610 : * new/incoming tuple (or should have leaf fillfactor applied when new item is
611 : * to the right on original page). This is appropriate when there is a
612 : * pattern of localized monotonically increasing insertions into a composite
613 : * index, where leading attribute values form local groupings, and we
614 : * anticipate further insertions of the same/current grouping (new item's
615 : * grouping) in the near future. This can be thought of as a variation on
616 : * applying leaf fillfactor during rightmost leaf page splits, since cases
617 : * that benefit will converge on packing leaf pages leaffillfactor% full over
618 : * time.
619 : *
620 : * We may leave extra free space remaining on the rightmost page of a "most
621 : * significant column" grouping of tuples if that grouping never ends up
622 : * having future insertions that use the free space. That effect is
623 : * self-limiting; a future grouping that becomes the "nearest on the right"
624 : * grouping of the affected grouping usually puts the extra free space to good
625 : * use.
626 : *
627 : * Caller uses optimization when routine returns true, though the exact action
628 : * taken by caller varies. Caller uses original leaf page fillfactor in
629 : * standard way rather than using the new item offset directly when *usemult
630 : * was also set to true here. Otherwise, caller applies optimization by
631 : * locating the legal split point that makes the new tuple the lastleft tuple
632 : * for the split.
633 : */
634 : static bool
1476 635 11274 : _bt_afternewitemoff(FindSplitData *state, OffsetNumber maxoff,
636 : int leaffillfactor, bool *usemult)
637 : {
638 : int16 nkeyatts;
639 : ItemId itemid;
640 : IndexTuple tup;
641 : int keepnatts;
642 :
643 11274 : Assert(state->is_leaf && !state->is_rightmost);
644 :
645 11274 : nkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
646 :
647 : /* Single key indexes not considered here */
648 11274 : if (nkeyatts == 1)
649 585 : return false;
650 :
651 : /* Ascending insertion pattern never inferred when new item is first */
652 10689 : if (state->newitemoff == P_FIRSTKEY)
653 1 : return false;
654 :
655 : /*
656 : * Only apply optimization on pages with equisized tuples, since ordinal
657 : * keys are likely to be fixed-width. Testing if the new tuple is
658 : * variable width directly might also work, but that fails to apply the
659 : * optimization to indexes with a numeric_ops attribute.
660 : *
661 : * Conclude that page has equisized tuples when the new item is the same
662 : * width as the smallest item observed during pass over page, and other
663 : * non-pivot tuples must be the same width as well. (Note that the
664 : * possibly-truncated existing high key isn't counted in
665 : * olddataitemstotal, and must be subtracted from maxoff.)
666 : */
667 10688 : if (state->newitemsz != state->minfirstrightsz)
668 3418 : return false;
669 7270 : if (state->newitemsz * (maxoff - 1) != state->olddataitemstotal)
670 4185 : return false;
671 :
672 : /*
673 : * Avoid applying optimization when tuples are wider than a tuple
674 : * consisting of two non-NULL int8/int64 attributes (or four non-NULL
675 : * int4/int32 attributes)
676 : */
677 3085 : if (state->newitemsz >
678 : MAXALIGN(sizeof(IndexTupleData) + sizeof(int64) * 2) +
679 : sizeof(ItemIdData))
1476 pg 680 UBC 0 : return false;
681 :
682 : /*
683 : * At least the first attribute's value must be equal to the corresponding
684 : * value in previous tuple to apply optimization. New item cannot be a
685 : * duplicate, either.
686 : *
687 : * Handle case where new item is to the right of all items on the existing
688 : * page. This is suggestive of monotonically increasing insertions in
689 : * itself, so the "heap TID adjacency" test is not applied here.
690 : */
1476 pg 691 CBC 3085 : if (state->newitemoff > maxoff)
692 : {
1091 693 525 : itemid = PageGetItemId(state->origpage, maxoff);
694 525 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
1476 695 525 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
696 :
697 525 : if (keepnatts > 1 && keepnatts <= nkeyatts)
698 : {
699 525 : *usemult = true;
700 525 : return true;
701 : }
702 :
1476 pg 703 UBC 0 : return false;
704 : }
705 :
706 : /*
707 : * "Low cardinality leading column, high cardinality suffix column"
708 : * indexes with a random insertion pattern (e.g., an index with a boolean
709 : * column, such as an index on '(book_is_in_print, book_isbn)') present us
710 : * with a risk of consistently misapplying the optimization. We're
711 : * willing to accept very occasional misapplication of the optimization,
712 : * provided the cases where we get it wrong are rare and self-limiting.
713 : *
714 : * Heap TID adjacency strongly suggests that the item just to the left was
715 : * inserted very recently, which limits overapplication of the
716 : * optimization. Besides, all inappropriate cases triggered here will
717 : * still split in the middle of the page on average.
718 : */
1091 pg 719 CBC 2560 : itemid = PageGetItemId(state->origpage, OffsetNumberPrev(state->newitemoff));
720 2560 : tup = (IndexTuple) PageGetItem(state->origpage, itemid);
721 : /* Do cheaper test first */
1138 722 2560 : if (BTreeTupleIsPosting(tup) ||
723 2560 : !_bt_adjacenthtid(&tup->t_tid, &state->newitem->t_tid))
1476 724 2442 : return false;
725 : /* Check same conditions as rightmost item case, too */
726 118 : keepnatts = _bt_keep_natts_fast(state->rel, tup, state->newitem);
727 :
728 118 : if (keepnatts > 1 && keepnatts <= nkeyatts)
729 : {
730 105 : double interp = (double) state->newitemoff / ((double) maxoff + 1);
731 105 : double leaffillfactormult = (double) leaffillfactor / 100.0;
732 :
733 : /*
734 : * Don't allow caller to split after a new item when it will result in
735 : * a split point to the right of the point that a leaf fillfactor
736 : * split would use -- have caller apply leaf fillfactor instead
737 : */
738 105 : *usemult = interp > leaffillfactormult;
739 :
740 105 : return true;
741 : }
742 :
743 13 : return false;
744 : }
745 :
746 : /*
747 : * Subroutine for determining if two heap TIDS are "adjacent".
748 : *
749 : * Adjacent means that the high TID is very likely to have been inserted into
750 : * heap relation immediately after the low TID, probably during the current
751 : * transaction.
752 : */
753 : static bool
754 2560 : _bt_adjacenthtid(ItemPointer lowhtid, ItemPointer highhtid)
755 : {
756 : BlockNumber lowblk,
757 : highblk;
758 :
759 2560 : lowblk = ItemPointerGetBlockNumber(lowhtid);
760 2560 : highblk = ItemPointerGetBlockNumber(highhtid);
761 :
762 : /* Make optimistic assumption of adjacency when heap blocks match */
763 2560 : if (lowblk == highblk)
764 118 : return true;
765 :
766 : /* When heap block one up, second offset should be FirstOffsetNumber */
767 3664 : if (lowblk + 1 == highblk &&
768 1222 : ItemPointerGetOffsetNumber(highhtid) == FirstOffsetNumber)
1476 pg 769 UBC 0 : return true;
770 :
1476 pg 771 CBC 2442 : return false;
772 : }
773 :
774 : /*
775 : * Subroutine to find the "best" split point among candidate split points.
776 : * The best split point is the split point with the lowest penalty among split
777 : * points that fall within current/final split interval. Penalty is an
778 : * abstract score, with a definition that varies depending on whether we're
779 : * splitting a leaf page or an internal page. See _bt_split_penalty() for
780 : * details.
781 : *
782 : * "perfectpenalty" is assumed to be the lowest possible penalty among
783 : * candidate split points. This allows us to return early without wasting
784 : * cycles on calculating the first differing attribute for all candidate
785 : * splits when that clearly cannot improve our choice (or when we only want a
786 : * minimally distinguishing split point, and don't want to make the split any
787 : * more unbalanced than is necessary).
788 : *
789 : * We return the index of the first existing tuple that should go on the right
790 : * page, plus a boolean indicating if new item is on left of split point.
791 : */
792 : static OffsetNumber
1364 793 23393 : _bt_bestsplitloc(FindSplitData *state, int perfectpenalty,
794 : bool *newitemonleft, FindSplitStrat strategy)
795 : {
796 : int bestpenalty,
797 : lowsplit;
1481 798 23393 : int highsplit = Min(state->interval, state->nsplits);
799 : SplitPoint *final;
800 :
801 23393 : bestpenalty = INT_MAX;
802 23393 : lowsplit = 0;
803 69290 : for (int i = lowsplit; i < highsplit; i++)
804 : {
805 : int penalty;
806 :
807 69271 : penalty = _bt_split_penalty(state, state->splits + i);
808 :
809 69271 : if (penalty < bestpenalty)
810 : {
811 30210 : bestpenalty = penalty;
812 30210 : lowsplit = i;
813 : }
814 :
1089 815 69271 : if (penalty <= perfectpenalty)
816 23374 : break;
817 : }
818 :
1364 819 23393 : final = &state->splits[lowsplit];
820 :
821 : /*
822 : * There is a risk that the "many duplicates" strategy will repeatedly do
823 : * the wrong thing when there are monotonically decreasing insertions to
824 : * the right of a large group of duplicates. Repeated splits could leave
825 : * a succession of right half pages with free space that can never be
826 : * used. This must be avoided.
827 : *
828 : * Consider the example of the leftmost page in a single integer attribute
829 : * NULLS FIRST index which is almost filled with NULLs. Monotonically
830 : * decreasing integer insertions might cause the same leftmost page to
831 : * split repeatedly at the same point. Each split derives its new high
832 : * key from the lowest current value to the immediate right of the large
833 : * group of NULLs, which will always be higher than all future integer
834 : * insertions, directing all future integer insertions to the same
835 : * leftmost page.
836 : */
837 23393 : if (strategy == SPLIT_MANY_DUPLICATES && !state->is_rightmost &&
1091 838 321 : !final->newitemonleft && final->firstrightoff >= state->newitemoff &&
1083 pg 839 UBC 0 : final->firstrightoff < state->newitemoff + 9)
840 : {
841 : /*
842 : * Avoid the problem by performing a 50:50 split when the new item is
843 : * just to the right of the would-be "many duplicates" split point.
844 : * (Note that the test used for an insert that is "just to the right"
845 : * of the split point is conservative.)
846 : */
1364 847 0 : final = &state->splits[0];
848 : }
849 :
1364 pg 850 CBC 23393 : *newitemonleft = final->newitemonleft;
1091 851 23393 : return final->firstrightoff;
852 : }
853 :
854 : #define LEAF_SPLIT_DISTANCE 0.050
855 : #define INTERNAL_SPLIT_DISTANCE 0.075
856 :
857 : /*
858 : * Return a split interval to use for the default strategy. This is a limit
859 : * on the number of candidate split points to give further consideration to.
860 : * Only a fraction of all candidate splits points (those located at the start
861 : * of the now-sorted splits array) fall within the split interval. Split
862 : * interval is applied within _bt_bestsplitloc().
863 : *
864 : * Split interval represents an acceptable range of split points -- those that
865 : * have leftfree and rightfree values that are acceptably balanced. The final
866 : * split point chosen is the split point with the lowest "penalty" among split
867 : * points in this split interval (unless we change our entire strategy, in
868 : * which case the interval also changes -- see _bt_strategy()).
869 : *
870 : * The "Prefix B-Trees" paper calls split interval sigma l for leaf splits,
871 : * and sigma b for internal ("branch") splits. It's hard to provide a
872 : * theoretical justification for the size of the split interval, though it's
873 : * clear that a small split interval can make tuples on level L+1 much smaller
874 : * on average, without noticeably affecting space utilization on level L.
875 : * (Note that the way that we calculate split interval might need to change if
876 : * suffix truncation is taught to truncate tuples "within" the last
877 : * attribute/datum for data types like text, which is more or less how it is
878 : * assumed to work in the paper.)
879 : */
880 : static int
1083 881 23393 : _bt_defaultinterval(FindSplitData *state)
882 : {
883 : SplitPoint *spaceoptimal;
884 : int16 tolerance,
885 : lowleftfree,
886 : lowrightfree,
887 : highleftfree,
888 : highrightfree;
889 :
890 : /*
891 : * Determine leftfree and rightfree values that are higher and lower than
892 : * we're willing to tolerate. Note that the final split interval will be
893 : * about 10% of nsplits in the common case where all non-pivot tuples
894 : * (data items) from a leaf page are uniformly sized. We're a bit more
895 : * aggressive when splitting internal pages.
896 : */
897 23393 : if (state->is_leaf)
898 23292 : tolerance = state->olddataitemstotal * LEAF_SPLIT_DISTANCE;
899 : else
900 101 : tolerance = state->olddataitemstotal * INTERNAL_SPLIT_DISTANCE;
901 :
902 : /* First candidate split point is the most evenly balanced */
903 23393 : spaceoptimal = state->splits;
904 23393 : lowleftfree = spaceoptimal->leftfree - tolerance;
905 23393 : lowrightfree = spaceoptimal->rightfree - tolerance;
906 23393 : highleftfree = spaceoptimal->leftfree + tolerance;
907 23393 : highrightfree = spaceoptimal->rightfree + tolerance;
908 :
909 : /*
910 : * Iterate through split points, starting from the split immediately after
911 : * 'spaceoptimal'. Find the first split point that divides free space so
912 : * unevenly that including it in the split interval would be unacceptable.
913 : */
914 687983 : for (int i = 1; i < state->nsplits; i++)
915 : {
916 687983 : SplitPoint *split = state->splits + i;
917 :
918 : /* Cannot use curdelta here, since its value is often weighted */
919 687983 : if (split->leftfree < lowleftfree || split->rightfree < lowrightfree ||
920 665582 : split->leftfree > highleftfree || split->rightfree > highrightfree)
921 23393 : return i;
922 : }
923 :
1083 pg 924 UBC 0 : return state->nsplits;
925 : }
926 :
927 : /*
928 : * Subroutine to decide whether split should use default strategy/initial
929 : * split interval, or whether it should finish splitting the page using
930 : * alternative strategies (this is only possible with leaf pages).
931 : *
932 : * Caller uses alternative strategy (or sticks with default strategy) based
933 : * on how *strategy is set here. Return value is "perfect penalty", which is
934 : * passed to _bt_bestsplitloc() as a final constraint on how far caller is
935 : * willing to go to avoid appending a heap TID when using the many duplicates
936 : * strategy (it also saves _bt_bestsplitloc() useless cycles).
937 : */
938 : static int
1481 pg 939 CBC 23393 : _bt_strategy(FindSplitData *state, SplitPoint *leftpage,
940 : SplitPoint *rightpage, FindSplitStrat *strategy)
941 : {
942 : IndexTuple leftmost,
943 : rightmost;
944 : SplitPoint *leftinterval,
945 : *rightinterval;
946 : int perfectpenalty;
947 23393 : int indnkeyatts = IndexRelationGetNumberOfKeyAttributes(state->rel);
948 :
949 : /* Assume that alternative strategy won't be used for now */
950 23393 : *strategy = SPLIT_DEFAULT;
951 :
952 : /*
953 : * Use smallest observed firstright item size for entire page (actually,
954 : * entire imaginary version of page that includes newitem) as perfect
955 : * penalty on internal pages. This can save cycles in the common case
956 : * where most or all splits (not just splits within interval) have
957 : * firstright tuples that are the same size.
958 : */
959 23393 : if (!state->is_leaf)
960 101 : return state->minfirstrightsz;
961 :
962 : /*
963 : * Use leftmost and rightmost tuples from leftmost and rightmost splits in
964 : * current split interval
965 : */
966 23292 : _bt_interval_edges(state, &leftinterval, &rightinterval);
967 23292 : leftmost = _bt_split_lastleft(state, leftinterval);
968 23292 : rightmost = _bt_split_firstright(state, rightinterval);
969 :
970 : /*
971 : * If initial split interval can produce a split point that will at least
972 : * avoid appending a heap TID in new high key, we're done. Finish split
973 : * with default strategy and initial split interval.
974 : */
975 23292 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
976 23292 : if (perfectpenalty <= indnkeyatts)
977 22808 : return perfectpenalty;
978 :
979 : /*
980 : * Work out how caller should finish split when even their "perfect"
981 : * penalty for initial/default split interval indicates that the interval
982 : * does not contain even a single split that avoids appending a heap TID.
983 : *
984 : * Use the leftmost split's lastleft tuple and the rightmost split's
985 : * firstright tuple to assess every possible split.
986 : */
987 484 : leftmost = _bt_split_lastleft(state, leftpage);
988 484 : rightmost = _bt_split_firstright(state, rightpage);
989 :
990 : /*
991 : * If page (including new item) has many duplicates but is not entirely
992 : * full of duplicates, a many duplicates strategy split will be performed.
993 : * If page is entirely full of duplicates, a single value strategy split
994 : * will be performed.
995 : */
996 484 : perfectpenalty = _bt_keep_natts_fast(state->rel, leftmost, rightmost);
997 484 : if (perfectpenalty <= indnkeyatts)
998 : {
999 325 : *strategy = SPLIT_MANY_DUPLICATES;
1000 :
1001 : /*
1002 : * Many duplicates strategy should split at either side the group of
1003 : * duplicates that enclose the delta-optimal split point. Return
1004 : * indnkeyatts rather than the true perfect penalty to make that
1005 : * happen. (If perfectpenalty was returned here then low cardinality
1006 : * composite indexes could have continual unbalanced splits.)
1007 : *
1008 : * Note that caller won't go through with a many duplicates split in
1009 : * rare cases where it looks like there are ever-decreasing insertions
1010 : * to the immediate right of the split point. This must happen just
1011 : * before a final decision is made, within _bt_bestsplitloc().
1012 : */
1013 325 : return indnkeyatts;
1014 : }
1015 :
1016 : /*
1017 : * Single value strategy is only appropriate with ever-increasing heap
1018 : * TIDs; otherwise, original default strategy split should proceed to
1019 : * avoid pathological performance. Use page high key to infer if this is
1020 : * the rightmost page among pages that store the same duplicate value.
1021 : * This should not prevent insertions of heap TIDs that are slightly out
1022 : * of order from using single value strategy, since that's expected with
1023 : * concurrent inserters of the same duplicate value.
1024 : */
1025 159 : else if (state->is_rightmost)
1026 115 : *strategy = SPLIT_SINGLE_VALUE;
1027 : else
1028 : {
1029 : ItemId itemid;
1030 : IndexTuple hikey;
1031 :
1091 1032 44 : itemid = PageGetItemId(state->origpage, P_HIKEY);
1033 44 : hikey = (IndexTuple) PageGetItem(state->origpage, itemid);
1481 1034 44 : perfectpenalty = _bt_keep_natts_fast(state->rel, hikey,
1035 : state->newitem);
1036 44 : if (perfectpenalty <= indnkeyatts)
1037 19 : *strategy = SPLIT_SINGLE_VALUE;
1038 : else
1039 : {
1040 : /*
1041 : * Have caller finish split using default strategy, since page
1042 : * does not appear to be the rightmost page for duplicates of the
1043 : * value the page is filled with
1044 : */
1045 : }
1046 : }
1047 :
1048 159 : return perfectpenalty;
1049 : }
1050 :
1051 : /*
1052 : * Subroutine to locate leftmost and rightmost splits for current/default
1053 : * split interval. Note that it will be the same split iff there is only one
1054 : * split in interval.
1055 : */
1056 : static void
1057 23292 : _bt_interval_edges(FindSplitData *state, SplitPoint **leftinterval,
1058 : SplitPoint **rightinterval)
1059 : {
1060 23292 : int highsplit = Min(state->interval, state->nsplits);
1061 : SplitPoint *deltaoptimal;
1062 :
1063 23292 : deltaoptimal = state->splits;
1064 23292 : *leftinterval = NULL;
1065 23292 : *rightinterval = NULL;
1066 :
1067 : /*
1068 : * Delta is an absolute distance to optimal split point, so both the
1069 : * leftmost and rightmost split point will usually be at the end of the
1070 : * array
1071 : */
1072 46383 : for (int i = highsplit - 1; i >= 0; i--)
1073 : {
1074 46383 : SplitPoint *distant = state->splits + i;
1075 :
1091 1076 46383 : if (distant->firstrightoff < deltaoptimal->firstrightoff)
1077 : {
1481 1078 22933 : if (*leftinterval == NULL)
1079 22799 : *leftinterval = distant;
1080 : }
1091 1081 23450 : else if (distant->firstrightoff > deltaoptimal->firstrightoff)
1082 : {
1481 1083 22948 : if (*rightinterval == NULL)
1084 22828 : *rightinterval = distant;
1085 : }
1086 502 : else if (!distant->newitemonleft && deltaoptimal->newitemonleft)
1087 : {
1088 : /*
1089 : * "incoming tuple will become firstright" (distant) is to the
1090 : * left of "incoming tuple will become lastleft" (delta-optimal)
1091 : */
1091 pg 1092 UBC 0 : Assert(distant->firstrightoff == state->newitemoff);
1481 1093 0 : if (*leftinterval == NULL)
1094 0 : *leftinterval = distant;
1095 : }
1481 pg 1096 CBC 502 : else if (distant->newitemonleft && !deltaoptimal->newitemonleft)
1097 : {
1098 : /*
1099 : * "incoming tuple will become lastleft" (distant) is to the right
1100 : * of "incoming tuple will become firstright" (delta-optimal)
1101 : */
1091 1102 7 : Assert(distant->firstrightoff == state->newitemoff);
1481 1103 7 : if (*rightinterval == NULL)
1104 4 : *rightinterval = distant;
1105 : }
1106 : else
1107 : {
1108 : /* There was only one or two splits in initial split interval */
1109 495 : Assert(distant == deltaoptimal);
1110 495 : if (*leftinterval == NULL)
1111 493 : *leftinterval = distant;
1112 495 : if (*rightinterval == NULL)
1113 460 : *rightinterval = distant;
1114 : }
1115 :
1116 46383 : if (*leftinterval && *rightinterval)
1117 23292 : return;
1118 : }
1119 :
1481 pg 1120 UBC 0 : Assert(false);
1121 : }
1122 :
1123 : /*
1124 : * Subroutine to find penalty for caller's candidate split point.
1125 : *
1126 : * On leaf pages, penalty is the attribute number that distinguishes each side
1127 : * of a split. It's the last attribute that needs to be included in new high
1128 : * key for left page. It can be greater than the number of key attributes in
1129 : * cases where a heap TID will need to be appended during truncation.
1130 : *
1131 : * On internal pages, penalty is simply the size of the firstright tuple for
1132 : * the split (including line pointer overhead). This tuple will become the
1133 : * new high key for the left page.
1134 : */
1135 : static inline int
1481 pg 1136 CBC 69271 : _bt_split_penalty(FindSplitData *state, SplitPoint *split)
1137 : {
1138 : IndexTuple lastleft;
1139 : IndexTuple firstright;
1140 :
1141 69271 : if (!state->is_leaf)
1142 : {
1143 : ItemId itemid;
1144 :
1145 112 : if (!split->newitemonleft &&
1091 1146 112 : split->firstrightoff == state->newitemoff)
1481 1147 6 : return state->newitemsz;
1148 :
1091 1149 106 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1150 :
1481 1151 106 : return MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
1152 : }
1153 :
1091 1154 69159 : lastleft = _bt_split_lastleft(state, split);
1155 69159 : firstright = _bt_split_firstright(state, split);
1156 :
1157 69159 : return _bt_keep_natts_fast(state->rel, lastleft, firstright);
1158 : }
1159 :
1160 : /*
1161 : * Subroutine to get a lastleft IndexTuple for a split point
1162 : */
1163 : static inline IndexTuple
1481 1164 92935 : _bt_split_lastleft(FindSplitData *state, SplitPoint *split)
1165 : {
1166 : ItemId itemid;
1167 :
1091 1168 92935 : if (split->newitemonleft && split->firstrightoff == state->newitemoff)
1481 1169 120 : return state->newitem;
1170 :
1091 1171 92815 : itemid = PageGetItemId(state->origpage,
1172 92815 : OffsetNumberPrev(split->firstrightoff));
1173 92815 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1174 : }
1175 :
1176 : /*
1177 : * Subroutine to get a firstright IndexTuple for a split point
1178 : */
1179 : static inline IndexTuple
1481 1180 92935 : _bt_split_firstright(FindSplitData *state, SplitPoint *split)
1181 : {
1182 : ItemId itemid;
1183 :
1091 1184 92935 : if (!split->newitemonleft && split->firstrightoff == state->newitemoff)
1481 1185 92 : return state->newitem;
1186 :
1091 1187 92843 : itemid = PageGetItemId(state->origpage, split->firstrightoff);
1188 92843 : return (IndexTuple) PageGetItem(state->origpage, itemid);
1189 : }
|