Age Owner Branch data TLA Line data Source code
1 : : /*-------------------------------------------------------------------------
2 : : *
3 : : * gistsplit.c
4 : : * Multi-column page splitting algorithm
5 : : *
6 : : * This file is concerned with making good page-split decisions in multi-column
7 : : * GiST indexes. The opclass-specific picksplit functions can only be expected
8 : : * to produce answers based on a single column. We first run the picksplit
9 : : * function for column 1; then, if there are more columns, we check if any of
10 : : * the tuples are "don't cares" so far as the column 1 split is concerned
11 : : * (that is, they could go to either side for no additional penalty). If so,
12 : : * we try to redistribute those tuples on the basis of the next column.
13 : : * Repeat till we're out of columns.
14 : : *
15 : : * gistSplitByKey() is the entry point to this file.
16 : : *
17 : : *
18 : : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
19 : : * Portions Copyright (c) 1994, Regents of the University of California
20 : : *
21 : : * IDENTIFICATION
22 : : * src/backend/access/gist/gistsplit.c
23 : : *
24 : : *-------------------------------------------------------------------------
25 : : */
26 : : #include "postgres.h"
27 : :
28 : : #include "access/gist_private.h"
29 : : #include "utils/rel.h"
30 : :
31 : : typedef struct
32 : : {
33 : : OffsetNumber *entries;
34 : : int len;
35 : : Datum *attr;
36 : : bool *isnull;
37 : : bool *dontcare;
38 : : } GistSplitUnion;
39 : :
40 : :
41 : : /*
42 : : * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
43 : : * ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for
44 : : * gistunionsubkey.
45 : : */
46 : : static void
6402 bruce@momjian.us 47 :CBC 2938 : gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
48 : : GistSplitUnion *gsvp)
49 : : {
50 : : IndexTuple *cleanedItVec;
51 : : int i,
52 : 2938 : cleanedLen = 0;
53 : :
54 : 2938 : cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
55 : :
56 [ + + ]: 140584 : for (i = 0; i < gsvp->len; i++)
57 : : {
4081 tgl@sss.pgh.pa.us 58 [ + + - + ]: 137646 : if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
6500 teodor@sigaev.ru 59 :UBC 0 : continue;
60 : :
6500 teodor@sigaev.ru 61 :CBC 137646 : cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
62 : : }
63 : :
4084 tgl@sss.pgh.pa.us 64 : 2938 : gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen,
65 : : gsvp->attr, gsvp->isnull);
66 : :
6402 bruce@momjian.us 67 : 2938 : pfree(cleanedItVec);
6500 teodor@sigaev.ru 68 : 2938 : }
69 : :
70 : : /*
71 : : * Recompute unions of left- and right-side subkeys after a page split,
72 : : * ignoring any tuples that are marked in spl->spl_dontcare[].
73 : : *
74 : : * Note: we always recompute union keys for all index columns. In some cases
75 : : * this might represent duplicate work for the leftmost column(s), but it's
76 : : * not safe to assume that "zero penalty to move a tuple" means "the union
77 : : * key doesn't change at all". Penalty functions aren't 100% accurate.
78 : : */
79 : : static void
4084 tgl@sss.pgh.pa.us 80 : 1469 : gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
81 : : {
82 : : GistSplitUnion gsvp;
83 : :
4081 84 : 1469 : gsvp.dontcare = spl->spl_dontcare;
85 : :
6500 teodor@sigaev.ru 86 : 1469 : gsvp.entries = spl->splitVector.spl_left;
4084 tgl@sss.pgh.pa.us 87 : 1469 : gsvp.len = spl->splitVector.spl_nleft;
88 : 1469 : gsvp.attr = spl->spl_lattr;
6500 teodor@sigaev.ru 89 : 1469 : gsvp.isnull = spl->spl_lisnull;
90 : :
4084 tgl@sss.pgh.pa.us 91 : 1469 : gistunionsubkeyvec(giststate, itvec, &gsvp);
92 : :
6500 teodor@sigaev.ru 93 : 1469 : gsvp.entries = spl->splitVector.spl_right;
4084 tgl@sss.pgh.pa.us 94 : 1469 : gsvp.len = spl->splitVector.spl_nright;
95 : 1469 : gsvp.attr = spl->spl_rattr;
6500 teodor@sigaev.ru 96 : 1469 : gsvp.isnull = spl->spl_risnull;
97 : :
4084 tgl@sss.pgh.pa.us 98 : 1469 : gistunionsubkeyvec(giststate, itvec, &gsvp);
6500 teodor@sigaev.ru 99 : 1469 : }
100 : :
101 : : /*
102 : : * Find tuples that are "don't cares", that is could be moved to the other
103 : : * side of the split with zero penalty, so far as the attno column is
104 : : * concerned.
105 : : *
106 : : * Don't-care tuples are marked by setting the corresponding entry in
107 : : * spl->spl_dontcare[] to "true". Caller must have initialized that array
108 : : * to zeroes.
109 : : *
110 : : * Returns number of don't-cares found.
111 : : */
112 : : static int
4081 tgl@sss.pgh.pa.us 113 : 1264 : findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
114 : : GistSplitVector *spl, int attno)
115 : : {
116 : : int i;
117 : : GISTENTRY entry;
118 : 1264 : int NumDontCare = 0;
119 : :
120 : : /*
121 : : * First, search the left-side tuples to see if any have zero penalty to
122 : : * be added to the right-side union key.
123 : : *
124 : : * attno column is known all-not-null (see gistSplitByKey), so we need not
125 : : * check for nulls
126 : : */
127 : 1264 : gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
128 : : (OffsetNumber) 0, false);
6402 bruce@momjian.us 129 [ + + ]: 62143 : for (i = 0; i < spl->splitVector.spl_nleft; i++)
130 : : {
4081 tgl@sss.pgh.pa.us 131 : 60879 : int j = spl->splitVector.spl_left[i];
6402 bruce@momjian.us 132 : 60879 : float penalty = gistpenalty(giststate, attno, &entry, false,
4081 tgl@sss.pgh.pa.us 133 : 60879 : &valvec[j], false);
134 : :
6402 bruce@momjian.us 135 [ + + ]: 60879 : if (penalty == 0.0)
136 : : {
4081 tgl@sss.pgh.pa.us 137 : 92 : spl->spl_dontcare[j] = true;
138 : 92 : NumDontCare++;
139 : : }
140 : : }
141 : :
142 : : /* And conversely for the right-side tuples */
143 : 1264 : gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
144 : : (OffsetNumber) 0, false);
6402 bruce@momjian.us 145 [ + + ]: 63403 : for (i = 0; i < spl->splitVector.spl_nright; i++)
146 : : {
4081 tgl@sss.pgh.pa.us 147 : 62139 : int j = spl->splitVector.spl_right[i];
6402 bruce@momjian.us 148 : 62139 : float penalty = gistpenalty(giststate, attno, &entry, false,
4081 tgl@sss.pgh.pa.us 149 : 62139 : &valvec[j], false);
150 : :
6402 bruce@momjian.us 151 [ + + ]: 62139 : if (penalty == 0.0)
152 : : {
4081 tgl@sss.pgh.pa.us 153 : 92 : spl->spl_dontcare[j] = true;
154 : 92 : NumDontCare++;
155 : : }
156 : : }
157 : :
158 : 1264 : return NumDontCare;
159 : : }
160 : :
161 : : /*
162 : : * Remove tuples that are marked don't-cares from the tuple index array a[]
163 : : * of length *len. This is applied separately to the spl_left and spl_right
164 : : * arrays.
165 : : */
166 : : static void
167 : 2 : removeDontCares(OffsetNumber *a, int *len, const bool *dontcare)
168 : : {
169 : : int origlen,
170 : : newlen,
171 : : i;
172 : : OffsetNumber *curwpos;
173 : :
174 : 2 : origlen = newlen = *len;
6500 teodor@sigaev.ru 175 : 2 : curwpos = a;
4081 tgl@sss.pgh.pa.us 176 [ + + ]: 188 : for (i = 0; i < origlen; i++)
177 : : {
178 : 186 : OffsetNumber ai = a[i];
179 : :
2433 peter_e@gmx.net 180 [ + + ]: 186 : if (dontcare[ai] == false)
181 : : {
182 : : /* re-emit item into a[] */
4081 tgl@sss.pgh.pa.us 183 : 2 : *curwpos = ai;
6500 teodor@sigaev.ru 184 : 2 : curwpos++;
185 : : }
186 : : else
4081 tgl@sss.pgh.pa.us 187 : 184 : newlen--;
188 : : }
189 : :
190 : 2 : *len = newlen;
6500 teodor@sigaev.ru 191 : 2 : }
192 : :
193 : : /*
194 : : * Place a single don't-care tuple into either the left or right side of the
195 : : * split, according to which has least penalty for merging the tuple into
196 : : * the previously-computed union keys. We need consider only columns starting
197 : : * at attno.
198 : : */
199 : : static void
4081 tgl@sss.pgh.pa.us 200 :UBC 0 : placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
201 : : IndexTuple itup, OffsetNumber off, int attno)
202 : : {
203 : : GISTENTRY identry[INDEX_MAX_KEYS];
204 : : bool isnull[INDEX_MAX_KEYS];
6402 bruce@momjian.us 205 : 0 : bool toLeft = true;
206 : :
4081 tgl@sss.pgh.pa.us 207 : 0 : gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
208 : : identry, isnull);
209 : :
1862 akorotkov@postgresql 210 [ # # ]: 0 : for (; attno < giststate->nonLeafTupdesc->natts; attno++)
211 : : {
212 : : float lpenalty,
213 : : rpenalty;
214 : : GISTENTRY entry;
215 : :
2433 peter_e@gmx.net 216 : 0 : gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false);
4081 tgl@sss.pgh.pa.us 217 : 0 : lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
218 : 0 : identry + attno, isnull[attno]);
2433 peter_e@gmx.net 219 : 0 : gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false);
4081 tgl@sss.pgh.pa.us 220 : 0 : rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
221 : 0 : identry + attno, isnull[attno]);
222 : :
6402 bruce@momjian.us 223 [ # # ]: 0 : if (lpenalty != rpenalty)
224 : : {
225 [ # # ]: 0 : if (lpenalty > rpenalty)
6500 teodor@sigaev.ru 226 : 0 : toLeft = false;
227 : 0 : break;
228 : : }
229 : : }
230 : :
6402 bruce@momjian.us 231 [ # # ]: 0 : if (toLeft)
232 : 0 : v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
233 : : else
234 : 0 : v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
6500 teodor@sigaev.ru 235 : 0 : }
236 : :
237 : : #define SWAPVAR( s, d, t ) \
238 : : do { \
239 : : (t) = (s); \
240 : : (s) = (d); \
241 : : (d) = (t); \
242 : : } while(0)
243 : :
244 : : /*
245 : : * Clean up when we did a secondary split but the user-defined PickSplit
246 : : * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
247 : : * true).
248 : : *
249 : : * We consider whether to swap the left and right outputs of the secondary
250 : : * split; this can be worthwhile if the penalty for merging those tuples into
251 : : * the previously chosen sets is less that way.
252 : : *
253 : : * In any case we must update the union datums for the current column by
254 : : * adding in the previous union keys (oldL/oldR), since the user-defined
255 : : * PickSplit method didn't do so.
256 : : */
257 : : static void
4081 tgl@sss.pgh.pa.us 258 :CBC 1 : supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
259 : : GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
260 : : {
6402 bruce@momjian.us 261 : 1 : bool leaveOnLeft = true,
262 : : tmpBool;
263 : : GISTENTRY entryL,
264 : : entryR,
265 : : entrySL,
266 : : entrySR;
267 : :
2433 peter_e@gmx.net 268 : 1 : gistentryinit(entryL, oldL, r, NULL, 0, false);
269 : 1 : gistentryinit(entryR, oldR, r, NULL, 0, false);
270 : 1 : gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
271 : 1 : gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
272 : :
6402 bruce@momjian.us 273 [ + - + - ]: 1 : if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
274 : 1 : {
275 : : float penalty1,
276 : : penalty2;
277 : :
6500 teodor@sigaev.ru 278 : 1 : penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
6402 bruce@momjian.us 279 : 1 : gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
6500 teodor@sigaev.ru 280 : 1 : penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
6402 bruce@momjian.us 281 : 1 : gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
282 : :
283 [ - + ]: 1 : if (penalty1 > penalty2)
6500 teodor@sigaev.ru 284 :UBC 0 : leaveOnLeft = false;
285 : : }
286 : : else
287 : : {
6402 bruce@momjian.us 288 [ # # ]: 0 : GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
289 : : float penalty1,
290 : : penalty2;
291 : :
292 : : /*
293 : : * There is only one previously defined union, so we just choose swap
294 : : * or not by lowest penalty for that side. We can only get here if a
295 : : * secondary split happened to have all NULLs in its column in the
296 : : * tuples that the outer recursion level had assigned to one side.
297 : : * (Note that the null checks in gistSplitByKey don't prevent the
298 : : * case, because they'll only be checking tuples that were considered
299 : : * don't-cares at the outer recursion level, not the tuples that went
300 : : * into determining the passed-down left and right union keys.)
301 : : */
6500 teodor@sigaev.ru 302 : 0 : penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
303 : 0 : penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
304 : :
6402 bruce@momjian.us 305 [ # # ]: 0 : if (penalty1 < penalty2)
949 michael@paquier.xyz 306 : 0 : leaveOnLeft = sv->spl_ldatum_exists;
307 : : else
308 : 0 : leaveOnLeft = sv->spl_rdatum_exists;
309 : : }
310 : :
6402 bruce@momjian.us 311 [ - + ]:CBC 1 : if (leaveOnLeft == false)
312 : : {
313 : : /*
314 : : * swap left and right
315 : : */
316 : : OffsetNumber *off,
317 : : noff;
318 : : Datum datum;
319 : :
6402 bruce@momjian.us 320 :UBC 0 : SWAPVAR(sv->spl_left, sv->spl_right, off);
321 : 0 : SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
322 : 0 : SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
2433 peter_e@gmx.net 323 : 0 : gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false);
324 : 0 : gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false);
325 : : }
326 : :
6402 bruce@momjian.us 327 [ + - ]:CBC 1 : if (sv->spl_ldatum_exists)
6500 teodor@sigaev.ru 328 : 1 : gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
329 : : &sv->spl_ldatum, &tmpBool);
330 : :
6402 bruce@momjian.us 331 [ + - ]: 1 : if (sv->spl_rdatum_exists)
6500 teodor@sigaev.ru 332 : 1 : gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
333 : : &sv->spl_rdatum, &tmpBool);
334 : :
335 : 1 : sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
336 : 1 : }
337 : :
338 : : /*
339 : : * Trivial picksplit implementation. Function called only
340 : : * if user-defined picksplit puts all keys on the same side of the split.
341 : : * That is a bug of user-defined picksplit but we don't want to fail.
342 : : */
343 : : static void
5487 344 : 45 : genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
345 : : {
346 : : OffsetNumber i,
347 : : maxoff;
348 : : int nbytes;
349 : : GistEntryVector *evec;
350 : :
351 : 45 : maxoff = entryvec->n - 1;
352 : :
353 : 45 : nbytes = (maxoff + 2) * sizeof(OffsetNumber);
354 : :
355 : 45 : v->spl_left = (OffsetNumber *) palloc(nbytes);
356 : 45 : v->spl_right = (OffsetNumber *) palloc(nbytes);
357 : 45 : v->spl_nleft = v->spl_nright = 0;
358 : :
359 [ + + ]: 12555 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
360 : : {
361 [ + + ]: 12510 : if (i <= (maxoff - FirstOffsetNumber + 1) / 2)
362 : : {
363 : 6255 : v->spl_left[v->spl_nleft] = i;
364 : 6255 : v->spl_nleft++;
365 : : }
366 : : else
367 : : {
368 : 6255 : v->spl_right[v->spl_nright] = i;
369 : 6255 : v->spl_nright++;
370 : : }
371 : : }
372 : :
373 : : /*
374 : : * Form union datums for each side
375 : : */
5421 bruce@momjian.us 376 : 45 : evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
377 : :
5487 teodor@sigaev.ru 378 : 45 : evec->n = v->spl_nleft;
5421 bruce@momjian.us 379 : 45 : memcpy(evec->vector, entryvec->vector + FirstOffsetNumber,
380 : 45 : sizeof(GISTENTRY) * evec->n);
4741 tgl@sss.pgh.pa.us 381 : 45 : v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno],
382 : : giststate->supportCollation[attno],
383 : : PointerGetDatum(evec),
384 : : PointerGetDatum(&nbytes));
385 : :
5487 teodor@sigaev.ru 386 : 45 : evec->n = v->spl_nright;
5421 bruce@momjian.us 387 : 45 : memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft,
388 : 45 : sizeof(GISTENTRY) * evec->n);
4741 tgl@sss.pgh.pa.us 389 : 45 : v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno],
390 : : giststate->supportCollation[attno],
391 : : PointerGetDatum(evec),
392 : : PointerGetDatum(&nbytes));
5487 teodor@sigaev.ru 393 : 45 : }
394 : :
395 : : /*
396 : : * Calls user picksplit method for attno column to split tuples into
397 : : * two vectors.
398 : : *
399 : : * Returns false if split is complete (there are no more index columns, or
400 : : * there is no need to consider them because split is optimal already).
401 : : *
402 : : * Returns true and v->spl_dontcare = NULL if the picksplit result is
403 : : * degenerate (all tuples seem to be don't-cares), so we should just
404 : : * disregard this column and split on the next column(s) instead.
405 : : *
406 : : * Returns true and v->spl_dontcare != NULL if there are don't-care tuples
407 : : * that could be relocated based on the next column(s). The don't-care
408 : : * tuples have been removed from the split and must be reinserted by caller.
409 : : * There is at least one non-don't-care tuple on each side of the split,
410 : : * and union keys for all columns are updated to include just those tuples.
411 : : *
412 : : * A true result implies there is at least one more index column.
413 : : */
414 : : static bool
6500 415 : 13209 : gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
416 : : IndexTuple *itup, int len, GISTSTATE *giststate)
417 : : {
418 : 13209 : GIST_SPLITVEC *sv = &v->splitVector;
419 : :
420 : : /*
421 : : * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
422 : : * case we are doing a secondary split (see comments in gist.h).
423 : : */
916 michael@paquier.xyz 424 : 13209 : sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
425 : 13209 : sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
6500 teodor@sigaev.ru 426 : 13209 : sv->spl_ldatum = v->spl_lattr[attno];
427 : 13209 : sv->spl_rdatum = v->spl_rattr[attno];
428 : :
429 : : /*
430 : : * Let the opclass-specific PickSplit method do its thing. Note that at
431 : : * this point we know there are no null keys in the entryvec.
432 : : */
4741 tgl@sss.pgh.pa.us 433 : 13209 : FunctionCall2Coll(&giststate->picksplitFn[attno],
434 : : giststate->supportCollation[attno],
435 : : PointerGetDatum(entryvec),
436 : : PointerGetDatum(sv));
437 : :
5421 bruce@momjian.us 438 [ + + - + ]: 13209 : if (sv->spl_nleft == 0 || sv->spl_nright == 0)
439 : : {
440 : : /*
441 : : * User-defined picksplit failed to create an actual split, ie it put
442 : : * everything on the same side. Complain but cope.
443 : : */
5487 teodor@sigaev.ru 444 [ - + ]: 45 : ereport(DEBUG1,
445 : : (errcode(ERRCODE_INTERNAL_ERROR),
446 : : errmsg("picksplit method for column %d of index \"%s\" failed",
447 : : attno + 1, RelationGetRelationName(r)),
448 : : errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
449 : :
450 : : /*
451 : : * Reinit GIST_SPLITVEC. Although these fields are not used by
452 : : * genericPickSplit(), set them up for further processing
453 : : */
916 michael@paquier.xyz 454 : 45 : sv->spl_ldatum_exists = !(v->spl_lisnull[attno]);
455 : 45 : sv->spl_rdatum_exists = !(v->spl_risnull[attno]);
5487 teodor@sigaev.ru 456 : 45 : sv->spl_ldatum = v->spl_lattr[attno];
457 : 45 : sv->spl_rdatum = v->spl_rattr[attno];
458 : :
459 : : /* Do a generic split */
460 : 45 : genericPickSplit(giststate, entryvec, sv, attno);
461 : : }
462 : : else
463 : : {
464 : : /* hack for compatibility with old picksplit API */
465 [ - + ]: 13164 : if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
5487 teodor@sigaev.ru 466 :UBC 0 : sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
5487 teodor@sigaev.ru 467 [ - + ]:CBC 13164 : if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
5487 teodor@sigaev.ru 468 :UBC 0 : sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
469 : : }
470 : :
471 : : /* Clean up if PickSplit didn't take care of a secondary split */
4081 tgl@sss.pgh.pa.us 472 [ + + - + ]:CBC 13209 : if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
473 : 1 : supportSecondarySplit(r, giststate, attno, sv,
474 : : v->spl_lattr[attno], v->spl_rattr[attno]);
475 : :
476 : : /* emit union datums computed by PickSplit back to v arrays */
6500 teodor@sigaev.ru 477 : 13209 : v->spl_lattr[attno] = sv->spl_ldatum;
478 : 13209 : v->spl_rattr[attno] = sv->spl_rdatum;
479 : 13209 : v->spl_lisnull[attno] = false;
480 : 13209 : v->spl_risnull[attno] = false;
481 : :
482 : : /*
483 : : * If index columns remain, then consider whether we can improve the split
484 : : * by using them.
485 : : */
4081 tgl@sss.pgh.pa.us 486 : 13209 : v->spl_dontcare = NULL;
487 : :
1862 akorotkov@postgresql 488 [ + + ]: 13209 : if (attno + 1 < giststate->nonLeafTupdesc->natts)
489 : : {
490 : : int NumDontCare;
491 : :
492 : : /*
493 : : * Make a quick check to see if left and right union keys are equal;
494 : : * if so, the split is certainly degenerate, so tell caller to
495 : : * re-split with the next column.
496 : : */
6402 bruce@momjian.us 497 [ + + ]: 1282 : if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
6500 teodor@sigaev.ru 498 : 18 : return true;
499 : :
500 : : /*
501 : : * Locate don't-care tuples, if any. If there are none, the split is
502 : : * optimal, so just fall out and return false.
503 : : */
4081 tgl@sss.pgh.pa.us 504 : 1264 : v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
505 : :
506 : 1264 : NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
507 : :
508 [ + + ]: 1264 : if (NumDontCare > 0)
509 : : {
510 : : /*
511 : : * Remove don't-cares from spl_left[] and spl_right[].
512 : : */
513 : 1 : removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare);
514 : 1 : removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare);
515 : :
516 : : /*
517 : : * If all tuples on either side were don't-cares, the split is
518 : : * degenerate, and we're best off to ignore it and split on the
519 : : * next column. (We used to try to press on with a secondary
520 : : * split by forcing a random tuple on each side to be treated as
521 : : * non-don't-care, but it seems unlikely that that technique
522 : : * really gives a better result. Note that we don't want to try a
523 : : * secondary split with empty left or right primary split sides,
524 : : * because then there is no union key on that side for the
525 : : * PickSplit function to try to expand, so it can have no good
526 : : * figure of merit for what it's doing. Also note that this check
527 : : * ensures we can't produce a bogus one-side-only split in the
528 : : * NumDontCare == 1 special case below.)
529 : : */
530 [ + - - + ]: 1 : if (sv->spl_nleft == 0 || sv->spl_nright == 0)
531 : : {
4081 tgl@sss.pgh.pa.us 532 :UBC 0 : v->spl_dontcare = NULL;
533 : 0 : return true;
534 : : }
535 : :
536 : : /*
537 : : * Recompute union keys, considering only non-don't-care tuples.
538 : : * NOTE: this will set union keys for remaining index columns,
539 : : * which will cause later calls of gistUserPicksplit to pass those
540 : : * values down to user-defined PickSplit methods with
541 : : * spl_ldatum_exists/spl_rdatum_exists set true.
542 : : */
4081 tgl@sss.pgh.pa.us 543 :CBC 1 : gistunionsubkey(giststate, itup, v);
544 : :
545 [ - + ]: 1 : if (NumDontCare == 1)
546 : : {
547 : : /*
548 : : * If there's only one don't-care tuple then we can't do a
549 : : * PickSplit on it, so just choose whether to send it left or
550 : : * right by comparing penalties. We needed the
551 : : * gistunionsubkey step anyway so that we have appropriate
552 : : * union keys for figuring the penalties.
553 : : */
554 : : OffsetNumber toMove;
555 : :
556 : : /* find it ... */
4081 tgl@sss.pgh.pa.us 557 [ # # ]:UBC 0 : for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
558 : : {
559 [ # # ]: 0 : if (v->spl_dontcare[toMove])
560 : 0 : break;
561 : : }
562 [ # # ]: 0 : Assert(toMove < entryvec->n);
563 : :
564 : : /* ... and assign it to cheaper side */
565 : 0 : placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
566 : :
567 : : /*
568 : : * At this point the union keys are wrong, but we don't care
569 : : * because we're done splitting. The outermost recursion
570 : : * level of gistSplitByKey will fix things before returning.
571 : : */
572 : : }
573 : : else
4081 tgl@sss.pgh.pa.us 574 :CBC 1 : return true;
575 : : }
576 : : }
577 : :
6500 teodor@sigaev.ru 578 : 13190 : return false;
579 : : }
580 : :
581 : : /*
582 : : * simply split page in half
583 : : */
584 : : static void
6402 bruce@momjian.us 585 :UBC 0 : gistSplitHalf(GIST_SPLITVEC *v, int len)
586 : : {
587 : : int i;
588 : :
589 : 0 : v->spl_nright = v->spl_nleft = 0;
6500 teodor@sigaev.ru 590 : 0 : v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
6402 bruce@momjian.us 591 : 0 : v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
592 [ # # ]: 0 : for (i = 1; i <= len; i++)
593 [ # # ]: 0 : if (i < len / 2)
594 : 0 : v->spl_right[v->spl_nright++] = i;
595 : : else
596 : 0 : v->spl_left[v->spl_nleft++] = i;
597 : :
598 : : /* we need not compute union keys, caller took care of it */
6500 teodor@sigaev.ru 599 : 0 : }
600 : :
601 : : /*
602 : : * gistSplitByKey: main entry point for page-splitting algorithm
603 : : *
604 : : * r: index relation
605 : : * page: page being split
606 : : * itup: array of IndexTuples to be processed
607 : : * len: number of IndexTuples to be processed (must be at least 2)
608 : : * giststate: additional info about index
609 : : * v: working state and output area
610 : : * attno: column we are working on (zero-based index)
611 : : *
612 : : * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
613 : : * to all-true. On return, spl_left/spl_nleft contain indexes of tuples
614 : : * to go left, spl_right/spl_nright contain indexes of tuples to go right,
615 : : * spl_lattr/spl_lisnull contain left-side union key values, and
616 : : * spl_rattr/spl_risnull contain right-side union key values. Other fields
617 : : * in this struct are workspace for this file.
618 : : *
619 : : * Outside caller must pass zero for attno. The function may internally
620 : : * recurse to the next column by passing attno+1.
621 : : */
622 : : void
4081 tgl@sss.pgh.pa.us 623 :CBC 13395 : gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
624 : : GISTSTATE *giststate, GistSplitVector *v, int attno)
625 : : {
626 : : GistEntryVector *entryvec;
627 : : OffsetNumber *offNullTuples;
6402 bruce@momjian.us 628 : 13395 : int nOffNullTuples = 0;
629 : : int i;
630 : :
631 : : /* generate the item array, and identify tuples with null keys */
632 : : /* note that entryvec->vector[0] goes unused in this code */
4081 tgl@sss.pgh.pa.us 633 : 13395 : entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
634 : 13395 : entryvec->n = len + 1;
635 : 13395 : offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
636 : :
6402 bruce@momjian.us 637 [ + + ]: 1153683 : for (i = 1; i <= len; i++)
638 : : {
639 : : Datum datum;
640 : : bool IsNull;
641 : :
1862 akorotkov@postgresql 642 : 1140288 : datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc,
643 : : &IsNull);
6500 teodor@sigaev.ru 644 : 1140288 : gistdentryinit(giststate, attno, &(entryvec->vector[i]),
645 : : datum, r, page, i,
646 : : false, IsNull);
6402 bruce@momjian.us 647 [ + + ]: 1140288 : if (IsNull)
648 : 939 : offNullTuples[nOffNullTuples++] = i;
649 : : }
650 : :
651 [ - + ]: 13395 : if (nOffNullTuples == len)
652 : : {
653 : : /*
654 : : * Corner case: All keys in attno column are null, so just transfer
655 : : * our attention to the next column. If there's no next column, just
656 : : * split page in half.
657 : : */
2433 peter_e@gmx.net 658 :UBC 0 : v->spl_risnull[attno] = v->spl_lisnull[attno] = true;
659 : :
1862 akorotkov@postgresql 660 [ # # ]: 0 : if (attno + 1 < giststate->nonLeafTupdesc->natts)
4081 tgl@sss.pgh.pa.us 661 : 0 : gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
662 : : else
663 : 0 : gistSplitHalf(&v->splitVector, len);
664 : : }
6402 bruce@momjian.us 665 [ + + ]:CBC 13395 : else if (nOffNullTuples > 0)
666 : : {
667 : 186 : int j = 0;
668 : :
669 : : /*
670 : : * We don't want to mix NULL and not-NULL keys on one page, so split
671 : : * nulls to right page and not-nulls to left.
672 : : */
6500 teodor@sigaev.ru 673 : 186 : v->splitVector.spl_right = offNullTuples;
674 : 186 : v->splitVector.spl_nright = nOffNullTuples;
2433 peter_e@gmx.net 675 : 186 : v->spl_risnull[attno] = true;
676 : :
6500 teodor@sigaev.ru 677 : 186 : v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
678 : 186 : v->splitVector.spl_nleft = 0;
6402 bruce@momjian.us 679 [ + + ]: 11464 : for (i = 1; i <= len; i++)
680 [ + + + + ]: 11278 : if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
6500 teodor@sigaev.ru 681 : 939 : j++;
682 : : else
6402 bruce@momjian.us 683 : 10339 : v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
684 : :
685 : : /* Compute union keys, unless outer recursion level will handle it */
1862 akorotkov@postgresql 686 [ + - + + ]: 186 : if (attno == 0 && giststate->nonLeafTupdesc->natts == 1)
687 : : {
4081 tgl@sss.pgh.pa.us 688 : 185 : v->spl_dontcare = NULL;
689 : 185 : gistunionsubkey(giststate, itup, v);
690 : : }
691 : : }
692 : : else
693 : : {
694 : : /*
695 : : * All keys are not-null, so apply user-defined PickSplit method
696 : : */
697 [ + + ]: 13209 : if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
698 : : {
699 : : /*
700 : : * Splitting on attno column is not optimal, so consider
701 : : * redistributing don't-care tuples according to the next column
702 : : */
1862 akorotkov@postgresql 703 [ - + ]: 19 : Assert(attno + 1 < giststate->nonLeafTupdesc->natts);
704 : :
4081 tgl@sss.pgh.pa.us 705 [ + + ]: 19 : if (v->spl_dontcare == NULL)
706 : : {
707 : : /*
708 : : * This split was actually degenerate, so ignore it altogether
709 : : * and just split according to the next column.
710 : : */
711 : 18 : gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
712 : : }
713 : : else
714 : : {
715 : : /*
716 : : * Form an array of just the don't-care tuples to pass to a
717 : : * recursive invocation of this function for the next column.
718 : : */
719 : 1 : IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
720 : 1 : OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
6402 bruce@momjian.us 721 : 1 : int newlen = 0;
722 : : GIST_SPLITVEC backupSplit;
723 : :
724 [ + + ]: 187 : for (i = 0; i < len; i++)
725 : : {
4081 tgl@sss.pgh.pa.us 726 [ + + ]: 186 : if (v->spl_dontcare[i + 1])
727 : : {
728 : 184 : newitup[newlen] = itup[i];
6402 bruce@momjian.us 729 : 184 : map[newlen] = i + 1;
4081 tgl@sss.pgh.pa.us 730 : 184 : newlen++;
731 : : }
732 : : }
733 : :
6402 bruce@momjian.us 734 [ - + ]: 1 : Assert(newlen > 0);
735 : :
736 : : /*
737 : : * Make a backup copy of v->splitVector, since the recursive
738 : : * call will overwrite that with its own result.
739 : : */
4081 tgl@sss.pgh.pa.us 740 : 1 : backupSplit = v->splitVector;
6402 bruce@momjian.us 741 : 1 : backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
742 : 1 : memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
743 : 1 : backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
744 : 1 : memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
745 : :
746 : : /* Recursively decide how to split the don't-care tuples */
4081 tgl@sss.pgh.pa.us 747 : 1 : gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
748 : :
749 : : /* Merge result of subsplit with non-don't-care tuples */
6402 bruce@momjian.us 750 [ + + ]: 93 : for (i = 0; i < v->splitVector.spl_nleft; i++)
751 : 92 : backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
752 [ + + ]: 93 : for (i = 0; i < v->splitVector.spl_nright; i++)
753 : 92 : backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
754 : :
6500 teodor@sigaev.ru 755 : 1 : v->splitVector = backupSplit;
756 : : }
757 : : }
758 : : }
759 : :
760 : : /*
761 : : * If we're handling a multicolumn index, at the end of the recursion
762 : : * recompute the left and right union datums for all index columns. This
763 : : * makes sure we hand back correct union datums in all corner cases,
764 : : * including when we haven't processed all columns to start with, or when
765 : : * a secondary split moved "don't care" tuples from one side to the other
766 : : * (we really shouldn't assume that that didn't change the union datums).
767 : : *
768 : : * Note: when we're in an internal recursion (attno > 0), we do not worry
769 : : * about whether the union datums we return with are sensible, since
770 : : * calling levels won't care. Also, in a single-column index, we expect
771 : : * that PickSplit (or the special cases above) produced correct union
772 : : * datums.
773 : : */
1862 akorotkov@postgresql 774 [ + + + + ]: 13395 : if (attno == 0 && giststate->nonLeafTupdesc->natts > 1)
775 : : {
4081 tgl@sss.pgh.pa.us 776 : 1283 : v->spl_dontcare = NULL;
777 : 1283 : gistunionsubkey(giststate, itup, v);
778 : : }
6500 teodor@sigaev.ru 779 : 13395 : }
|