Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * gistutil.c
4 : * utilities routines for the postgres GiST index access method.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/gist/gistutil.c
12 : *-------------------------------------------------------------------------
13 : */
14 : #include "postgres.h"
15 :
16 : #include <math.h>
17 :
18 : #include "access/gist_private.h"
19 : #include "access/htup_details.h"
20 : #include "access/reloptions.h"
21 : #include "catalog/pg_opclass.h"
22 : #include "common/pg_prng.h"
23 : #include "storage/indexfsm.h"
24 : #include "storage/lmgr.h"
25 : #include "utils/float.h"
26 : #include "utils/lsyscache.h"
27 : #include "utils/snapmgr.h"
28 : #include "utils/syscache.h"
29 :
30 : /*
31 : * Write itup vector to page, has no control of free space.
32 : */
33 : void
5414 heikki.linnakangas 34 CBC 571797 : gistfillbuffer(Page page, IndexTuple *itup, int len, OffsetNumber off)
35 : {
36 : int i;
37 :
6408 bruce 38 571797 : if (off == InvalidOffsetNumber)
39 1140368 : off = (PageIsEmpty(page)) ? FirstOffsetNumber :
6494 teodor 40 569472 : OffsetNumberNext(PageGetMaxOffsetNumber(page));
41 :
6508 42 1229750 : for (i = 0; i < len; i++)
43 : {
5050 bruce 44 657953 : Size sz = IndexTupleSize(itup[i]);
45 : OffsetNumber l;
46 :
5414 heikki.linnakangas 47 657953 : l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
6508 teodor 48 657953 : if (l == InvalidOffsetNumber)
5414 heikki.linnakangas 49 UBC 0 : elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50 : i, len, (int) sz);
6508 teodor 51 CBC 657953 : off++;
52 : }
53 571797 : }
54 :
55 : /*
56 : * Check space for itup vector on page
57 : */
58 : bool
6125 bruce 59 847063 : gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace)
60 : {
6031 61 847063 : unsigned int size = freespace,
62 847063 : deleted = 0;
63 : int i;
64 :
6508 teodor 65 1706232 : for (i = 0; i < len; i++)
66 859169 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67 :
6031 bruce 68 847063 : if (todelete != InvalidOffsetNumber)
69 : {
70 373764 : IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71 :
6178 teodor 72 373764 : deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73 : }
74 :
75 847063 : return (PageGetFreeSpace(page) + deleted < size);
76 : }
77 :
78 : bool
6031 bruce 79 25768 : gistfitpage(IndexTuple *itvec, int len)
80 : {
81 : int i;
82 25768 : Size size = 0;
83 :
84 1146646 : for (i = 0; i < len; i++)
6178 teodor 85 1120878 : size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86 :
87 : /* TODO: Consider fillfactor */
88 25768 : return (size <= GiSTPageSize);
89 : }
90 :
91 : /*
92 : * Read buffer into itup vector
93 : */
94 : IndexTuple *
6171 95 12832 : gistextractpage(Page page, int *len /* out */ )
96 : {
97 : OffsetNumber i,
98 : maxoff;
99 : IndexTuple *itvec;
100 :
101 12832 : maxoff = PageGetMaxOffsetNumber(page);
6508 102 12832 : *len = maxoff;
103 12832 : itvec = palloc(sizeof(IndexTuple) * maxoff);
104 996127 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
6171 105 983295 : itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106 :
6508 107 12832 : return itvec;
108 : }
109 :
110 : /*
111 : * join two vectors into one
112 : */
113 : IndexTuple *
114 12692 : gistjoinvector(IndexTuple *itvec, int *len, IndexTuple *additvec, int addlen)
115 : {
61 peter 116 GNC 12692 : itvec = (IndexTuple *) repalloc(itvec, sizeof(IndexTuple) * ((*len) + addlen));
6508 teodor 117 CBC 12692 : memmove(&itvec[*len], additvec, sizeof(IndexTuple) * addlen);
118 12692 : *len += addlen;
119 12692 : return itvec;
120 : }
121 :
122 : /*
123 : * make plain IndexTuple vector
124 : */
125 :
126 : IndexTupleData *
6031 bruce 127 25514 : gistfillitupvec(IndexTuple *vec, int veclen, int *memlen)
128 : {
129 : char *ptr,
130 : *ret;
131 : int i;
132 :
133 25514 : *memlen = 0;
134 :
6169 teodor 135 1021476 : for (i = 0; i < veclen; i++)
136 995962 : *memlen += IndexTupleSize(vec[i]);
137 :
138 25514 : ptr = ret = palloc(*memlen);
139 :
6031 bruce 140 1021476 : for (i = 0; i < veclen; i++)
141 : {
6169 teodor 142 995962 : memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143 995962 : ptr += IndexTupleSize(vec[i]);
144 : }
145 :
6031 bruce 146 25514 : return (IndexTupleData *) ret;
147 : }
148 :
149 : /*
150 : * Make unions of keys in IndexTuple vector (one union datum per index column).
151 : * Union Datums are returned into the attr/isnull arrays.
152 : * Resulting Datums aren't compressed.
153 : */
154 : void
3713 tgl 155 2745 : gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
156 : Datum *attr, bool *isnull)
157 : {
158 : int i;
159 : GistEntryVector *evec;
160 : int attrsize;
161 :
6031 bruce 162 2745 : evec = (GistEntryVector *) palloc((len + 2) * sizeof(GISTENTRY) + GEVHDRSZ);
163 :
1491 akorotkov 164 8058 : for (i = 0; i < giststate->nonLeafTupdesc->natts; i++)
165 : {
166 : int j;
167 :
168 : /* Collect non-null datums for this column */
6164 teodor 169 5313 : evec->n = 0;
6031 bruce 170 269014 : for (j = 0; j < len; j++)
171 : {
172 : Datum datum;
173 : bool IsNull;
174 :
1491 akorotkov 175 263701 : datum = index_getattr(itvec[j], i + 1, giststate->leafTupdesc,
176 : &IsNull);
6508 teodor 177 263701 : if (IsNull)
178 870 : continue;
179 :
180 262831 : gistdentryinit(giststate, i,
6164 181 262831 : evec->vector + evec->n,
182 : datum,
183 : NULL, NULL, (OffsetNumber) 0,
184 : false, IsNull);
185 262831 : evec->n++;
186 : }
187 :
188 : /* If this column was all NULLs, the union is NULL */
6031 bruce 189 5313 : if (evec->n == 0)
190 : {
6508 teodor 191 89 : attr[i] = (Datum) 0;
2062 peter_e 192 89 : isnull[i] = true;
193 : }
194 : else
195 : {
6031 bruce 196 5224 : if (evec->n == 1)
197 : {
198 : /* unionFn may expect at least two inputs */
6508 teodor 199 4 : evec->n = 2;
6164 200 4 : evec->vector[1] = evec->vector[0];
201 : }
202 :
203 : /* Make union and store in attr array */
4370 tgl 204 5224 : attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205 : giststate->supportCollation[i],
206 : PointerGetDatum(evec),
207 : PointerGetDatum(&attrsize));
208 :
2062 peter_e 209 5224 : isnull[i] = false;
210 : }
211 : }
6164 teodor 212 2745 : }
213 :
214 : /*
215 : * Return an IndexTuple containing the result of applying the "union"
216 : * method to the specified IndexTuple vector.
217 : */
218 : IndexTuple
219 3 : gistunion(Relation r, IndexTuple *itvec, int len, GISTSTATE *giststate)
220 : {
221 : Datum attr[INDEX_MAX_KEYS];
222 : bool isnull[INDEX_MAX_KEYS];
223 :
3713 tgl 224 3 : gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225 :
226 3 : return gistFormTuple(giststate, r, attr, isnull, false);
227 : }
228 :
229 : /*
230 : * makes union of two key
231 : */
232 : void
6031 bruce 233 695695 : gistMakeUnionKey(GISTSTATE *giststate, int attno,
234 : GISTENTRY *entry1, bool isnull1,
235 : GISTENTRY *entry2, bool isnull2,
236 : Datum *dst, bool *dstisnull)
237 : {
238 : /* we need a GistEntryVector with room for exactly 2 elements */
239 : union
240 : {
241 : GistEntryVector gev;
242 : char padding[2 * sizeof(GISTENTRY) + GEVHDRSZ];
243 : } storage;
3713 tgl 244 695695 : GistEntryVector *evec = &storage.gev;
245 : int dstsize;
246 :
6164 teodor 247 695695 : evec->n = 2;
248 :
6031 bruce 249 695695 : if (isnull1 && isnull2)
250 : {
2062 peter_e 251 8357 : *dstisnull = true;
6031 bruce 252 8357 : *dst = (Datum) 0;
253 : }
254 : else
255 : {
2062 peter_e 256 687338 : if (isnull1 == false && isnull2 == false)
257 : {
6164 teodor 258 687098 : evec->vector[0] = *entry1;
259 687098 : evec->vector[1] = *entry2;
260 : }
2062 peter_e 261 240 : else if (isnull1 == false)
262 : {
6164 teodor 263 240 : evec->vector[0] = *entry1;
264 240 : evec->vector[1] = *entry1;
265 : }
266 : else
267 : {
6164 teodor 268 UBC 0 : evec->vector[0] = *entry2;
269 0 : evec->vector[1] = *entry2;
270 : }
271 :
2062 peter_e 272 CBC 687338 : *dstisnull = false;
4370 tgl 273 687338 : *dst = FunctionCall2Coll(&giststate->unionFn[attno],
274 : giststate->supportCollation[attno],
275 : PointerGetDatum(evec),
276 : PointerGetDatum(&dstsize));
277 : }
6164 teodor 278 695695 : }
279 :
280 : bool
6031 bruce 281 596884 : gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b)
282 : {
194 peter 283 GNC 596884 : bool result = false; /* silence compiler warning */
284 :
4370 tgl 285 CBC 596884 : FunctionCall3Coll(&giststate->equalFn[attno],
286 : giststate->supportCollation[attno],
287 : a, b,
288 : PointerGetDatum(&result));
6159 teodor 289 596884 : return result;
290 : }
291 :
292 : /*
293 : * Decompress all keys in tuple
294 : */
295 : void
6129 296 1820664 : gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
297 : OffsetNumber o, GISTENTRY *attdata, bool *isnull)
298 : {
299 : int i;
300 :
1491 akorotkov 301 3922623 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302 : {
303 : Datum datum;
304 :
305 2101959 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
6129 teodor 306 2101959 : gistdentryinit(giststate, i, &attdata[i],
307 : datum, r, p, o,
2062 peter_e 308 2101959 : false, isnull[i]);
309 : }
6129 teodor 310 1820664 : }
311 :
312 : /*
313 : * Forms union of oldtup and addtup, if union == oldtup then return NULL
314 : */
315 : IndexTuple
6508 316 601928 : gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
317 : {
2062 peter_e 318 601928 : bool neednew = false;
319 : GISTENTRY oldentries[INDEX_MAX_KEYS],
320 : addentries[INDEX_MAX_KEYS];
321 : bool oldisnull[INDEX_MAX_KEYS],
322 : addisnull[INDEX_MAX_KEYS];
323 : Datum attr[INDEX_MAX_KEYS];
324 : bool isnull[INDEX_MAX_KEYS];
6508 teodor 325 601928 : IndexTuple newtup = NULL;
326 : int i;
327 :
328 601928 : gistDeCompressAtt(giststate, r, oldtup, NULL,
329 : (OffsetNumber) 0, oldentries, oldisnull);
330 :
331 601928 : gistDeCompressAtt(giststate, r, addtup, NULL,
332 : (OffsetNumber) 0, addentries, addisnull);
333 :
1491 akorotkov 334 1297621 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335 : {
6031 bruce 336 695693 : gistMakeUnionKey(giststate, i,
337 695693 : oldentries + i, oldisnull[i],
338 695693 : addentries + i, addisnull[i],
3713 tgl 339 695693 : attr + i, isnull + i);
340 :
6031 bruce 341 695693 : if (neednew)
342 : /* we already need new key, so we can skip check */
6164 teodor 343 91494 : continue;
344 :
3713 tgl 345 604199 : if (isnull[i])
346 : /* union of key may be NULL if and only if both keys are NULL */
6164 teodor 347 8357 : continue;
348 :
6031 bruce 349 595842 : if (!addisnull[i])
350 : {
3713 tgl 351 595602 : if (oldisnull[i] ||
352 595602 : !gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
6164 teodor 353 376526 : neednew = true;
354 : }
355 : }
356 :
6508 357 601928 : if (neednew)
358 : {
359 : /* need to update key */
3713 tgl 360 376526 : newtup = gistFormTuple(giststate, r, attr, isnull, false);
6508 teodor 361 376526 : newtup->t_tid = oldtup->t_tid;
362 : }
363 :
364 601928 : return newtup;
365 : }
366 :
367 : /*
368 : * Search an upper index page for the entry with lowest penalty for insertion
369 : * of the new index key contained in "it".
370 : *
371 : * Returns the index of the page entry to insert into.
372 : */
373 : OffsetNumber
374 587057 : gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
375 : GISTSTATE *giststate)
376 : {
377 : OffsetNumber result;
378 : OffsetNumber maxoff;
379 : OffsetNumber i;
380 : float best_penalty[INDEX_MAX_KEYS];
381 : GISTENTRY entry,
382 : identry[INDEX_MAX_KEYS];
383 : bool isnull[INDEX_MAX_KEYS];
384 : int keep_current_best;
385 :
3874 tgl 386 587057 : Assert(!GistPageIsLeaf(p));
387 :
6508 teodor 388 587057 : gistDeCompressAtt(giststate, r,
389 : it, NULL, (OffsetNumber) 0,
390 : identry, isnull);
391 :
392 : /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
3874 tgl 393 587057 : result = FirstOffsetNumber;
394 :
395 : /*
396 : * The index may have multiple columns, and there's a penalty value for
397 : * each column. The penalty associated with a column that appears earlier
398 : * in the index definition is strictly more important than the penalty of
399 : * a column that appears later in the index definition.
400 : *
401 : * best_penalty[j] is the best penalty we have seen so far for column j,
402 : * or -1 when we haven't yet examined column j. Array entries to the
403 : * right of the first -1 are undefined.
404 : */
405 587057 : best_penalty[0] = -1;
406 :
407 : /*
408 : * If we find a tuple that's exactly as good as the currently best one, we
409 : * could use either one. When inserting a lot of tuples with the same or
410 : * similar keys, it's preferable to descend down the same path when
411 : * possible, as that's more cache-friendly. On the other hand, if all
412 : * inserts land on the same leaf page after a split, we're never going to
413 : * insert anything to the other half of the split, and will end up using
414 : * only 50% of the available space. Distributing the inserts evenly would
415 : * lead to better space usage, but that hurts cache-locality during
416 : * insertion. To get the best of both worlds, when we find a tuple that's
417 : * exactly as good as the previous best, choose randomly whether to stick
418 : * to the old best, or use the new one. Once we decide to stick to the
419 : * old best, we keep sticking to it for any subsequent equally good tuples
420 : * we might find. This favors tuples with low offsets, but still allows
421 : * some inserts to go to other equally-good subtrees.
422 : *
423 : * keep_current_best is -1 if we haven't yet had to make a random choice
424 : * whether to keep the current best tuple. If we have done so, and
425 : * decided to keep it, keep_current_best is 1; if we've decided to
426 : * replace, keep_current_best is 0. (This state will be reset to -1 as
427 : * soon as we've made the replacement, but sometimes we make the choice in
428 : * advance of actually finding a replacement best tuple.)
429 : */
3726 heikki.linnakangas 430 587057 : keep_current_best = -1;
431 :
432 : /*
433 : * Loop over tuples on page.
434 : */
3874 tgl 435 587057 : maxoff = PageGetMaxOffsetNumber(p);
436 587057 : Assert(maxoff >= FirstOffsetNumber);
437 :
438 19408958 : for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439 : {
6508 teodor 440 18935988 : IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
441 : bool zero_penalty;
442 : int j;
443 :
3874 tgl 444 18935988 : zero_penalty = true;
445 :
446 : /* Loop over index attributes. */
1491 akorotkov 447 38424335 : for (j = 0; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448 : {
449 : Datum datum;
450 : float usize;
451 : bool IsNull;
452 :
453 : /* Compute penalty for this column. */
454 22620600 : datum = index_getattr(itup, j + 1, giststate->leafTupdesc,
455 : &IsNull);
6508 teodor 456 22620600 : gistdentryinit(giststate, j, &entry, datum, r, p, i,
457 : false, IsNull);
6164 458 22620600 : usize = gistpenalty(giststate, j, &entry, IsNull,
459 22620600 : &identry[j], isnull[j]);
3874 tgl 460 22620600 : if (usize > 0)
461 22389054 : zero_penalty = false;
462 :
463 22620600 : if (best_penalty[j] < 0 || usize < best_penalty[j])
464 : {
465 : /*
466 : * New best penalty for column. Tentatively select this tuple
467 : * as the target, and record the best penalty. Then reset the
468 : * next column's penalty to "unknown" (and indirectly, the
469 : * same for all the ones to its right). This will force us to
470 : * adopt this tuple's penalty values as the best for all the
471 : * remaining columns during subsequent loop iterations.
472 : */
473 18175866 : result = i;
474 18175866 : best_penalty[j] = usize;
475 :
1491 akorotkov 476 18175866 : if (j < IndexRelationGetNumberOfKeyAttributes(r) - 1)
3874 tgl 477 3682957 : best_penalty[j + 1] = -1;
478 :
479 : /* we have new best, so reset keep-it decision */
3726 heikki.linnakangas 480 18175866 : keep_current_best = -1;
481 : }
3874 tgl 482 4444734 : else if (best_penalty[j] == usize)
483 : {
484 : /*
485 : * The current tuple is exactly as good for this column as the
486 : * best tuple seen so far. The next iteration of this loop
487 : * will compare the next column.
488 : */
489 : }
490 : else
491 : {
492 : /*
493 : * The current tuple is worse for this column than the best
494 : * tuple seen so far. Skip the remaining columns and move on
495 : * to the next tuple, if any.
496 : */
497 3132253 : zero_penalty = false; /* so outer loop won't exit */
6508 teodor 498 3132253 : break;
499 : }
500 : }
501 :
502 : /*
503 : * If we looped past the last column, and did not update "result",
504 : * then this tuple is exactly as good as the prior best tuple.
505 : */
1491 akorotkov 506 18935988 : if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507 : {
3726 heikki.linnakangas 508 1310826 : if (keep_current_best == -1)
509 : {
510 : /* we didn't make the random choice yet for this old best */
497 tgl 511 127482 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
512 : }
3726 heikki.linnakangas 513 1310826 : if (keep_current_best == 0)
514 : {
515 : /* we choose to use the new tuple */
516 110789 : result = i;
517 : /* choose again if there are even more exactly-as-good ones */
518 110789 : keep_current_best = -1;
519 : }
520 : }
521 :
522 : /*
523 : * If we find a tuple with zero penalty for all columns, and we've
524 : * decided we don't want to search for another tuple with equal
525 : * penalty, there's no need to examine remaining tuples; just break
526 : * out of the loop and return it.
527 : */
3874 tgl 528 18935988 : if (zero_penalty)
529 : {
3726 heikki.linnakangas 530 227635 : if (keep_current_best == -1)
531 : {
532 : /* we didn't make the random choice yet for this old best */
497 tgl 533 227635 : keep_current_best = pg_prng_bool(&pg_global_prng_state) ? 1 : 0;
534 : }
3726 heikki.linnakangas 535 227635 : if (keep_current_best == 1)
536 114087 : break;
537 : }
538 : }
539 :
3874 tgl 540 587057 : return result;
541 : }
542 :
543 : /*
544 : * initialize a GiST entry with a decompressed version of key
545 : */
546 : void
6508 teodor 547 27081214 : gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
548 : Datum k, Relation r, Page pg, OffsetNumber o,
549 : bool l, bool isNull)
550 : {
6129 551 27081214 : if (!isNull)
552 : {
553 : GISTENTRY *dep;
554 :
555 26950197 : gistentryinit(*e, k, r, pg, o, l);
556 :
557 : /* there may not be a decompress function in opclass */
2028 tgl 558 26950197 : if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559 24106107 : return;
560 :
561 : dep = (GISTENTRY *)
4370 562 2844090 : DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
563 : giststate->supportCollation[nkey],
564 : PointerGetDatum(e)));
565 : /* decompressFn may just return the given pointer */
6508 teodor 566 2844090 : if (dep != e)
567 239865 : gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568 : dep->leafkey);
569 : }
570 : else
6129 571 131017 : gistentryinit(*e, (Datum) 0, r, pg, o, l);
572 : }
573 :
574 : IndexTuple
6508 575 875129 : gistFormTuple(GISTSTATE *giststate, Relation r,
576 : Datum *attdata, bool *isnull, bool isleaf)
577 : {
578 : Datum compatt[INDEX_MAX_KEYS];
579 : IndexTuple res;
580 :
934 heikki.linnakangas 581 875129 : gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
582 :
583 875129 : res = index_form_tuple(isleaf ? giststate->leafTupdesc :
584 : giststate->nonLeafTupdesc,
585 : compatt, isnull);
586 :
587 : /*
588 : * The offset number on tuples on internal pages is unused. For historical
589 : * reasons, it is set to 0xffff.
590 : */
591 875129 : ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
592 875129 : return res;
593 : }
594 :
595 : void
596 972201 : gistCompressValues(GISTSTATE *giststate, Relation r,
597 : Datum *attdata, bool *isnull, bool isleaf, Datum *compatt)
598 : {
599 : int i;
600 :
601 : /*
602 : * Call the compress method on each attribute.
603 : */
1491 akorotkov 604 2101212 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
605 : {
6508 teodor 606 1129011 : if (isnull[i])
607 7460 : compatt[i] = (Datum) 0;
608 : else
609 : {
610 : GISTENTRY centry;
611 : GISTENTRY *cep;
612 :
2936 heikki.linnakangas 613 1121551 : gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) 0,
614 : isleaf);
615 : /* there may not be a compress function in opclass */
2028 tgl 616 1121551 : if (OidIsValid(giststate->compressFn[i].fn_oid))
617 : cep = (GISTENTRY *)
618 876922 : DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[i],
619 : giststate->supportCollation[i],
620 : PointerGetDatum(¢ry)));
621 : else
622 244629 : cep = ¢ry;
2936 heikki.linnakangas 623 1121551 : compatt[i] = cep->key;
624 : }
625 : }
626 :
1491 akorotkov 627 972201 : if (isleaf)
628 : {
629 : /*
630 : * Emplace each included attribute if any.
631 : */
632 714923 : for (; i < r->rd_att->natts; i++)
633 : {
634 144570 : if (isnull[i])
1491 akorotkov 635 UBC 0 : compatt[i] = (Datum) 0;
636 : else
1491 akorotkov 637 CBC 144570 : compatt[i] = attdata[i];
638 : }
639 : }
6508 teodor 640 972201 : }
641 :
642 : /*
643 : * initialize a GiST entry with fetched value in key field
644 : */
645 : static Datum
2936 heikki.linnakangas 646 52714 : gistFetchAtt(GISTSTATE *giststate, int nkey, Datum k, Relation r)
647 : {
648 : GISTENTRY fentry;
649 : GISTENTRY *fep;
650 :
651 52714 : gistentryinit(fentry, k, r, NULL, (OffsetNumber) 0, false);
652 :
653 : fep = (GISTENTRY *)
654 52714 : DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
655 : giststate->supportCollation[nkey],
656 : PointerGetDatum(&fentry)));
657 :
658 : /* fetchFn set 'key', return it to the caller */
659 52714 : return fep->key;
660 : }
661 :
662 : /*
663 : * Fetch all keys in tuple.
664 : * Returns a new HeapTuple containing the originally-indexed data.
665 : */
666 : HeapTuple
667 265102 : gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
668 : {
669 265102 : MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
670 : Datum fetchatt[INDEX_MAX_KEYS];
671 : bool isnull[INDEX_MAX_KEYS];
672 : int i;
673 :
1491 akorotkov 674 560379 : for (i = 0; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
675 : {
676 : Datum datum;
677 :
678 295277 : datum = index_getattr(tuple, i + 1, giststate->leafTupdesc, &isnull[i]);
679 :
2936 heikki.linnakangas 680 295277 : if (giststate->fetchFn[i].fn_oid != InvalidOid)
681 : {
682 52720 : if (!isnull[i])
683 52714 : fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
684 : else
685 6 : fetchatt[i] = (Datum) 0;
686 : }
2028 tgl 687 242557 : else if (giststate->compressFn[i].fn_oid == InvalidOid)
688 : {
689 : /*
690 : * If opclass does not provide compress method that could change
691 : * original value, att is necessarily stored in original form.
692 : */
693 212382 : if (!isnull[i])
694 210714 : fetchatt[i] = datum;
695 : else
696 1668 : fetchatt[i] = (Datum) 0;
697 : }
698 : else
699 : {
700 : /*
701 : * Index-only scans not supported for this column. Since the
702 : * planner chose an index-only scan anyway, it is not interested
703 : * in this column, and we can replace it with a NULL.
704 : */
2936 heikki.linnakangas 705 30175 : isnull[i] = true;
706 30175 : fetchatt[i] = (Datum) 0;
707 : }
708 : }
709 :
710 : /*
711 : * Get each included attribute.
712 : */
1491 akorotkov 713 265102 : for (; i < r->rd_att->natts; i++)
714 : {
1491 akorotkov 715 UBC 0 : fetchatt[i] = index_getattr(tuple, i + 1, giststate->leafTupdesc,
716 : &isnull[i]);
717 : }
2936 heikki.linnakangas 718 CBC 265102 : MemoryContextSwitchTo(oldcxt);
719 :
2232 tgl 720 265102 : return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
721 : }
722 :
723 : float
6508 teodor 724 22773547 : gistpenalty(GISTSTATE *giststate, int attno,
725 : GISTENTRY *orig, bool isNullOrig,
726 : GISTENTRY *add, bool isNullAdd)
727 : {
6031 bruce 728 22773547 : float penalty = 0.0;
729 :
2062 peter_e 730 22773547 : if (giststate->penaltyFn[attno].fn_strict == false ||
731 22773547 : (isNullOrig == false && isNullAdd == false))
732 : {
4370 tgl 733 22628879 : FunctionCall3Coll(&giststate->penaltyFn[attno],
734 : giststate->supportCollation[attno],
735 : PointerGetDatum(orig),
736 : PointerGetDatum(add),
737 : PointerGetDatum(&penalty));
738 : /* disallow negative or NaN penalty */
4331 739 22628879 : if (isnan(penalty) || penalty < 0.0)
740 2279 : penalty = 0.0;
741 : }
6031 bruce 742 144668 : else if (isNullOrig && isNullAdd)
6164 teodor 743 8441 : penalty = 0.0;
744 : else
745 : {
746 : /* try to prevent mixing null and non-null values */
4151 tgl 747 136227 : penalty = get_float4_infinity();
748 : }
749 :
6164 teodor 750 22773547 : return penalty;
751 : }
752 :
753 : /*
754 : * Initialize a new index page
755 : */
756 : void
934 heikki.linnakangas 757 14947 : gistinitpage(Page page, uint32 f)
758 : {
759 : GISTPageOpaque opaque;
760 :
653 peter 761 14947 : PageInit(page, BLCKSZ, sizeof(GISTPageOpaqueData));
762 :
6363 tgl 763 14947 : opaque = GistPageGetOpaque(page);
5844 764 14947 : opaque->rightlink = InvalidBlockNumber;
765 14947 : opaque->flags = f;
766 14947 : opaque->gist_page_id = GIST_PAGE_ID;
6363 767 14947 : }
768 :
769 : /*
770 : * Initialize a new index buffer
771 : */
772 : void
934 heikki.linnakangas 773 13645 : GISTInitBuffer(Buffer b, uint32 f)
774 : {
775 : Page page;
776 :
777 13645 : page = BufferGetPage(b);
778 13645 : gistinitpage(page, f);
779 13645 : }
780 :
781 : /*
782 : * Verify that a freshly-read page looks sane.
783 : */
784 : void
6363 tgl 785 1054897 : gistcheckpage(Relation rel, Buffer buf)
786 : {
2545 kgrittn 787 1054897 : Page page = BufferGetPage(buf);
788 :
789 : /*
790 : * ReadBuffer verifies that every newly-read page passes
791 : * PageHeaderIsValid, which means it either contains a reasonably sane
792 : * page header or is all-zero. We have to defend against the all-zero
793 : * case, however.
794 : */
6363 tgl 795 1054897 : if (PageIsNew(page))
6363 tgl 796 UBC 0 : ereport(ERROR,
797 : (errcode(ERRCODE_INDEX_CORRUPTED),
798 : errmsg("index \"%s\" contains unexpected zero page at block %u",
799 : RelationGetRelationName(rel),
800 : BufferGetBlockNumber(buf)),
801 : errhint("Please REINDEX it.")));
802 :
803 : /*
804 : * Additionally check that the special area looks sane.
805 : */
5383 tgl 806 CBC 1054897 : if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
6363 tgl 807 UBC 0 : ereport(ERROR,
808 : (errcode(ERRCODE_INDEX_CORRUPTED),
809 : errmsg("index \"%s\" contains corrupted page at block %u",
810 : RelationGetRelationName(rel),
811 : BufferGetBlockNumber(buf)),
812 : errhint("Please REINDEX it.")));
6363 tgl 813 CBC 1054897 : }
814 :
815 :
816 : /*
817 : * Allocate a new page (either by recycling, or by extending the index file)
818 : *
819 : * The returned buffer is already pinned and exclusive-locked
820 : *
821 : * Caller is responsible for initializing the page by calling GISTInitBuffer
822 : */
823 : Buffer
8 andres 824 GNC 12741 : gistNewBuffer(Relation r, Relation heaprel)
825 : {
826 : Buffer buffer;
827 :
828 : /* First, try to get a page from FSM */
6363 tgl 829 EUB : for (;;)
6408 bruce 830 LBC 0 : {
5304 heikki.linnakangas 831 GIC 12741 : BlockNumber blkno = GetFreeIndexPage(r);
6408 bruce 832 ECB :
6495 teodor 833 CBC 12741 : if (blkno == InvalidBlockNumber)
6363 tgl 834 GIC 12741 : break; /* nothing left in FSM */
6502 teodor 835 EUB :
6495 teodor 836 UIC 0 : buffer = ReadBuffer(r, blkno);
837 :
838 : /*
839 : * We have to guard against the possibility that someone else already
840 : * recycled this page; the buffer may be locked if so.
6363 tgl 841 EUB : */
6408 bruce 842 UIC 0 : if (ConditionalLockBuffer(buffer))
6408 bruce 843 EUB : {
2545 kgrittn 844 UIC 0 : Page page = BufferGetPage(buffer);
845 :
846 : /*
847 : * If the page was never initialized, it's OK to use.
1479 heikki.linnakangas 848 EUB : */
6363 tgl 849 UBC 0 : if (PageIsNew(page))
1479 heikki.linnakangas 850 UIC 0 : return buffer;
6363 tgl 851 EUB :
6363 tgl 852 UIC 0 : gistcheckpage(r, buffer);
853 :
854 : /*
855 : * Otherwise, recycle it if deleted, and too old to have any
856 : * processes interested in it.
1479 heikki.linnakangas 857 EUB : */
1479 heikki.linnakangas 858 UIC 0 : if (gistPageRecyclable(page))
859 : {
860 : /*
861 : * If we are generating WAL for Hot Standby then create a WAL
862 : * record that will allow us to conflict with queries running
863 : * on standby, in case they have snapshots older than the
864 : * page's deleteXid.
1479 heikki.linnakangas 865 EUB : */
1479 heikki.linnakangas 866 UBC 0 : if (XLogStandbyInfoActive() && RelationNeedsWAL(r))
8 andres 867 UNC 0 : gistXLogPageReuse(r, heaprel, blkno, GistPageGetDeleteXid(page));
1479 heikki.linnakangas 868 EUB :
1479 heikki.linnakangas 869 UIC 0 : return buffer;
870 : }
6363 tgl 871 EUB :
6363 tgl 872 UIC 0 : LockBuffer(buffer, GIST_UNLOCK);
873 : }
874 :
6363 tgl 875 EUB : /* Can't use it, so release buffer and try again */
6408 bruce 876 UIC 0 : ReleaseBuffer(buffer);
877 : }
878 :
6363 tgl 879 ECB : /* Must extend the file */
4 andres 880 GNC 12741 : buffer = ExtendBufferedRel(EB_REL(r), MAIN_FORKNUM, NULL,
881 : EB_LOCK_FIRST);
6408 bruce 882 EUB :
6502 teodor 883 CBC 12741 : return buffer;
884 : }
885 :
886 : /* Can this page be recycled yet? */
887 : bool
1479 heikki.linnakangas 888 GIC 1331 : gistPageRecyclable(Page page)
889 : {
1355 890 1331 : if (PageIsNew(page))
1355 heikki.linnakangas 891 UIC 0 : return true;
1355 heikki.linnakangas 892 GIC 1331 : if (GistPageIsDeleted(page))
893 : {
894 : /*
1355 heikki.linnakangas 895 EUB : * The page was deleted, but when? If it was just deleted, a scan
896 : * might have seen the downlink to it, and will read the page later.
897 : * As long as that can happen, we must keep the deleted page around as
898 : * a tombstone.
1355 heikki.linnakangas 899 ECB : *
900 : * For that check if the deletion XID could still be visible to
901 : * anyone. If not, then no scan that's still in progress could have
902 : * seen its downlink, and we can recycle it.
903 : */
1355 heikki.linnakangas 904 UIC 0 : FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
905 :
791 pg 906 0 : return GlobalVisCheckRemovableFullXid(NULL, deletexid_full);
907 : }
1355 heikki.linnakangas 908 GIC 1331 : return false;
909 : }
1479 heikki.linnakangas 910 ECB :
911 : bytea *
2639 tgl 912 GIC 77 : gistoptions(Datum reloptions, bool validate)
913 : {
914 : static const relopt_parse_elt tab[] = {
915 : {"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916 : {"buffering", RELOPT_TYPE_ENUM, offsetof(GiSTOptions, buffering_mode)}
917 : };
918 :
1251 michael 919 77 : return (bytea *) build_reloptions(reloptions, validate,
920 : RELOPT_KIND_GIST,
921 : sizeof(GiSTOptions),
922 : tab, lengthof(tab));
923 : }
4527 heikki.linnakangas 924 ECB :
925 : /*
926 : * gistproperty() -- Check boolean properties of indexes.
927 : *
928 : * This is optional for most AMs, but is required for GiST because the core
929 : * property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
930 : * AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
931 : */
932 : bool
2430 tgl 933 GIC 234 : gistproperty(Oid index_oid, int attno,
2430 tgl 934 ECB : IndexAMProperty prop, const char *propname,
935 : bool *res, bool *isnull)
936 : {
937 : Oid opclass,
938 : opfamily,
939 : opcintype;
940 : int16 procno;
941 :
942 : /* Only answer column-level inquiries */
2430 tgl 943 GIC 234 : if (attno == 0)
944 147 : return false;
945 :
946 : /*
947 : * Currently, GiST distance-ordered scans require that there be a distance
2430 tgl 948 ECB : * function in the opclass with the default types (i.e. the one loaded
949 : * into the relcache entry, see initGISTstate). So we assume that if such
950 : * a function exists, then there's a reason for it (rather than grubbing
951 : * through all the opfamily's operators to find an ordered one).
952 : *
953 : * Essentially the same code can test whether we support returning the
954 : * column data, since that's true if the opclass provides a fetch proc.
955 : */
956 :
2430 tgl 957 CBC 87 : switch (prop)
958 : {
2430 tgl 959 GIC 6 : case AMPROP_DISTANCE_ORDERABLE:
960 6 : procno = GIST_DISTANCE_PROC;
2430 tgl 961 CBC 6 : break;
962 6 : case AMPROP_RETURNABLE:
2430 tgl 963 GIC 6 : procno = GIST_FETCH_PROC;
2430 tgl 964 GBC 6 : break;
965 75 : default:
2430 tgl 966 GIC 75 : return false;
967 : }
968 :
2430 tgl 969 ECB : /* First we need to know the column's opclass. */
1663 akorotkov 970 GIC 12 : opclass = get_index_column_opclass(index_oid, attno);
1663 akorotkov 971 GBC 12 : if (!OidIsValid(opclass))
2430 tgl 972 EUB : {
2430 tgl 973 UIC 0 : *isnull = true;
974 0 : return true;
975 : }
976 :
2430 tgl 977 ECB : /* Now look up the opclass family and input datatype. */
1663 akorotkov 978 GIC 12 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
979 : {
2430 tgl 980 UIC 0 : *isnull = true;
981 0 : return true;
982 : }
983 :
984 : /* And now we can check whether the function is provided. */
985 :
2430 tgl 986 GIC 12 : *res = SearchSysCacheExists4(AMPROCNUM,
2430 tgl 987 ECB : ObjectIdGetDatum(opfamily),
988 : ObjectIdGetDatum(opcintype),
989 : ObjectIdGetDatum(opcintype),
990 : Int16GetDatum(procno));
991 :
992 : /*
993 : * Special case: even without a fetch function, AMPROP_RETURNABLE is true
994 : * if the opclass has no compress function.
995 : */
2028 tgl 996 CBC 12 : if (prop == AMPROP_RETURNABLE && !*res)
997 : {
998 6 : *res = !SearchSysCacheExists4(AMPROCNUM,
999 : ObjectIdGetDatum(opfamily),
1000 : ObjectIdGetDatum(opcintype),
1001 : ObjectIdGetDatum(opcintype),
1002 : Int16GetDatum(GIST_COMPRESS_PROC));
1003 : }
1004 :
1663 akorotkov 1005 GIC 12 : *isnull = false;
1006 :
2430 tgl 1007 CBC 12 : return true;
1008 : }
2430 tgl 1009 ECB :
1010 : /*
1011 : * Some indexes are not WAL-logged, but we need LSNs to detect concurrent page
1012 : * splits anyway. This function provides a fake sequence of LSNs for that
1013 : * purpose.
1014 : */
1015 : XLogRecPtr
3709 heikki.linnakangas 1016 GIC 42 : gistGetFakeLSN(Relation rel)
4527 heikki.linnakangas 1017 ECB : {
3709 heikki.linnakangas 1018 GIC 42 : if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
3709 heikki.linnakangas 1019 ECB : {
1020 : /*
1021 : * Temporary relations are only accessible in our session, so a simple
1022 : * backend-local counter will do.
1023 : */
1024 : static XLogRecPtr counter = FirstNormalUnloggedLSN;
1025 :
3709 heikki.linnakangas 1026 GIC 9 : return counter++;
1027 : }
748 bruce 1028 GBC 33 : else if (RelationIsPermanent(rel))
1029 : {
1030 : /*
1100 noah 1031 EUB : * WAL-logging on this relation will start after commit, so its LSNs
1032 : * must be distinct numbers smaller than the LSN at the next commit.
1033 : * Emit a dummy WAL record if insert-LSN hasn't advanced after the
1034 : * last call.
1035 : */
1036 : static XLogRecPtr lastlsn = InvalidXLogRecPtr;
1100 noah 1037 UBC 0 : XLogRecPtr currlsn = GetXLogInsertRecPtr();
1100 noah 1038 EUB :
1039 : /* Shouldn't be called for WAL-logging relations */
1100 noah 1040 UIC 0 : Assert(!RelationNeedsWAL(rel));
1041 :
1042 : /* No need for an actual record if we already have a distinct LSN */
1043 0 : if (!XLogRecPtrIsInvalid(lastlsn) && lastlsn == currlsn)
1044 0 : currlsn = gistXLogAssignLSN();
1045 :
1100 noah 1046 LBC 0 : lastlsn = currlsn;
1047 0 : return currlsn;
1048 : }
1049 : else
1050 : {
1051 : /*
1052 : * Unlogged relations are accessible from other backends, and survive
1053 : * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1054 : */
3709 heikki.linnakangas 1055 GIC 33 : Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1056 33 : return GetFakeLSNForUnloggedRel();
1057 : }
1058 : }
|