Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgutils.c
4 : * various support functions for SP-GiST
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * IDENTIFICATION
11 : * src/backend/access/spgist/spgutils.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 :
16 : #include "postgres.h"
17 :
18 : #include "access/amvalidate.h"
19 : #include "access/htup_details.h"
20 : #include "access/reloptions.h"
21 : #include "access/spgist_private.h"
22 : #include "access/toast_compression.h"
23 : #include "access/transam.h"
24 : #include "access/xact.h"
25 : #include "catalog/pg_amop.h"
26 : #include "commands/vacuum.h"
27 : #include "nodes/nodeFuncs.h"
28 : #include "parser/parse_coerce.h"
29 : #include "storage/bufmgr.h"
30 : #include "storage/indexfsm.h"
31 : #include "storage/lmgr.h"
32 : #include "utils/builtins.h"
33 : #include "utils/catcache.h"
34 : #include "utils/index_selfuncs.h"
35 : #include "utils/lsyscache.h"
36 : #include "utils/syscache.h"
37 :
38 :
39 : /*
40 : * SP-GiST handler function: return IndexAmRoutine with access method parameters
41 : * and callbacks.
42 : */
43 : Datum
2639 tgl 44 CBC 610 : spghandler(PG_FUNCTION_ARGS)
45 : {
46 610 : IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
47 :
48 610 : amroutine->amstrategies = 0;
2537 teodor 49 610 : amroutine->amsupport = SPGISTNProc;
1105 akorotkov 50 610 : amroutine->amoptsprocnum = SPGIST_OPTIONS_PROC;
2639 tgl 51 610 : amroutine->amcanorder = false;
1663 akorotkov 52 610 : amroutine->amcanorderbyop = true;
2639 tgl 53 610 : amroutine->amcanbackward = false;
54 610 : amroutine->amcanunique = false;
55 610 : amroutine->amcanmulticol = false;
56 610 : amroutine->amoptionalkey = true;
57 610 : amroutine->amsearcharray = false;
58 610 : amroutine->amsearchnulls = true;
735 59 610 : amroutine->amstorage = true;
2639 60 610 : amroutine->amclusterable = false;
61 610 : amroutine->ampredlocks = false;
2244 rhaas 62 610 : amroutine->amcanparallel = false;
734 tgl 63 610 : amroutine->amcaninclude = true;
1180 akapila 64 610 : amroutine->amusemaintenanceworkmem = false;
20 tomas.vondra 65 GNC 610 : amroutine->amsummarizing = false;
1180 akapila 66 CBC 610 : amroutine->amparallelvacuumoptions =
1180 akapila 67 ECB : VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP;
2639 tgl 68 GIC 610 : amroutine->amkeytype = InvalidOid;
2639 tgl 69 ECB :
2639 tgl 70 GIC 610 : amroutine->ambuild = spgbuild;
2639 tgl 71 CBC 610 : amroutine->ambuildempty = spgbuildempty;
72 610 : amroutine->aminsert = spginsert;
73 610 : amroutine->ambulkdelete = spgbulkdelete;
74 610 : amroutine->amvacuumcleanup = spgvacuumcleanup;
75 610 : amroutine->amcanreturn = spgcanreturn;
76 610 : amroutine->amcostestimate = spgcostestimate;
77 610 : amroutine->amoptions = spgoptions;
1663 akorotkov 78 610 : amroutine->amproperty = spgproperty;
1468 alvherre 79 610 : amroutine->ambuildphasename = NULL;
2639 tgl 80 610 : amroutine->amvalidate = spgvalidate;
981 81 610 : amroutine->amadjustmembers = spgadjustmembers;
2639 82 610 : amroutine->ambeginscan = spgbeginscan;
83 610 : amroutine->amrescan = spgrescan;
84 610 : amroutine->amgettuple = spggettuple;
85 610 : amroutine->amgetbitmap = spggetbitmap;
86 610 : amroutine->amendscan = spgendscan;
87 610 : amroutine->ammarkpos = NULL;
88 610 : amroutine->amrestrpos = NULL;
2266 rhaas 89 610 : amroutine->amestimateparallelscan = NULL;
90 610 : amroutine->aminitparallelscan = NULL;
91 610 : amroutine->amparallelrescan = NULL;
2639 tgl 92 ECB :
2639 tgl 93 GIC 610 : PG_RETURN_POINTER(amroutine);
2639 tgl 94 ECB : }
95 :
96 : /*
97 : * GetIndexInputType
98 : * Determine the nominal input data type for an index column
99 : *
100 : * We define the "nominal" input type as the associated opclass's opcintype,
101 : * or if that is a polymorphic type, the base type of the heap column or
102 : * expression that is the index's input. The reason for preferring the
103 : * opcintype is that non-polymorphic opclasses probably don't want to hear
104 : * about binary-compatible input types. For instance, if a text opclass
105 : * is being used with a varchar heap column, we want to report "text" not
106 : * "varchar". Likewise, opclasses don't want to hear about domain types,
107 : * so if we do consult the actual input type, we make sure to flatten domains.
108 : *
109 : * At some point maybe this should go somewhere else, but it's not clear
110 : * if any other index AMs have a use for it.
111 : */
112 : static Oid
735 tgl 113 GIC 198 : GetIndexInputType(Relation index, AttrNumber indexcol)
735 tgl 114 ECB : {
115 : Oid opcintype;
116 : AttrNumber heapcol;
117 : List *indexprs;
118 : ListCell *indexpr_item;
119 :
735 tgl 120 GIC 198 : Assert(index->rd_index != NULL);
735 tgl 121 CBC 198 : Assert(indexcol > 0 && indexcol <= index->rd_index->indnkeyatts);
122 198 : opcintype = index->rd_opcintype[indexcol - 1];
123 198 : if (!IsPolymorphicType(opcintype))
124 146 : return opcintype;
125 52 : heapcol = index->rd_index->indkey.values[indexcol - 1];
126 52 : if (heapcol != 0) /* Simple index column? */
127 46 : return getBaseType(get_atttype(index->rd_index->indrelid, heapcol));
735 tgl 128 ECB :
129 : /*
130 : * If the index expressions are already cached, skip calling
131 : * RelationGetIndexExpressions, as it will make a copy which is overkill.
132 : * We're not going to modify the trees, and we're not going to do anything
133 : * that would invalidate the relcache entry before we're done.
134 : */
735 tgl 135 GIC 6 : if (index->rd_indexprs)
735 tgl 136 LBC 0 : indexprs = index->rd_indexprs;
735 tgl 137 EUB : else
735 tgl 138 GIC 6 : indexprs = RelationGetIndexExpressions(index);
735 tgl 139 CBC 6 : indexpr_item = list_head(indexprs);
140 6 : for (int i = 1; i <= index->rd_index->indnkeyatts; i++)
735 tgl 141 ECB : {
735 tgl 142 GIC 6 : if (index->rd_index->indkey.values[i - 1] == 0)
735 tgl 143 ECB : {
144 : /* expression column */
735 tgl 145 GIC 6 : if (indexpr_item == NULL)
735 tgl 146 LBC 0 : elog(ERROR, "wrong number of index expressions");
735 tgl 147 GBC 6 : if (i == indexcol)
735 tgl 148 CBC 6 : return getBaseType(exprType((Node *) lfirst(indexpr_item)));
735 tgl 149 LBC 0 : indexpr_item = lnext(indexprs, indexpr_item);
735 tgl 150 EUB : }
151 : }
735 tgl 152 UIC 0 : elog(ERROR, "wrong number of index expressions");
735 tgl 153 EUB : return InvalidOid; /* keep compiler quiet */
154 : }
155 :
156 : /* Fill in a SpGistTypeDesc struct with info about the specified data type */
157 : static void
4131 tgl 158 GIC 609 : fillTypeDesc(SpGistTypeDesc *desc, Oid type)
4131 tgl 159 ECB : {
160 : HeapTuple tp;
161 : Form_pg_type typtup;
162 :
4131 tgl 163 GIC 609 : desc->type = type;
734 tgl 164 CBC 609 : tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type));
165 609 : if (!HeapTupleIsValid(tp))
734 tgl 166 LBC 0 : elog(ERROR, "cache lookup failed for type %u", type);
734 tgl 167 GBC 609 : typtup = (Form_pg_type) GETSTRUCT(tp);
734 tgl 168 CBC 609 : desc->attlen = typtup->typlen;
169 609 : desc->attbyval = typtup->typbyval;
170 609 : desc->attalign = typtup->typalign;
686 171 609 : desc->attstorage = typtup->typstorage;
734 172 609 : ReleaseSysCache(tp);
4131 173 609 : }
4131 tgl 174 ECB :
175 : /*
176 : * Fetch local cache of AM-specific info about the index, initializing it
177 : * if necessary
178 : */
179 : SpGistCache *
4129 tgl 180 GIC 1345085 : spgGetCache(Relation index)
4129 tgl 181 ECB : {
182 : SpGistCache *cache;
183 :
4129 tgl 184 GIC 1345085 : if (index->rd_amcache == NULL)
4129 tgl 185 ECB : {
186 : Oid atttype;
187 : spgConfigIn in;
188 : FmgrInfo *procinfo;
189 : Buffer metabuffer;
190 : SpGistMetaPageData *metadata;
191 :
4129 tgl 192 GIC 198 : cache = MemoryContextAllocZero(index->rd_indexcxt,
4129 tgl 193 ECB : sizeof(SpGistCache));
194 :
195 : /* SPGiST must have one key column and can also have INCLUDE columns */
734 tgl 196 GIC 198 : Assert(IndexRelationGetNumberOfKeyAttributes(index) == 1);
734 tgl 197 CBC 198 : Assert(IndexRelationGetNumberOfAttributes(index) <= INDEX_MAX_KEYS);
4129 tgl 198 ECB :
199 : /*
200 : * Get the actual (well, nominal) data type of the key column. We
201 : * pass this to the opclass config function so that polymorphic
202 : * opclasses are possible.
203 : */
734 tgl 204 GIC 198 : atttype = GetIndexInputType(index, spgKeyColumn + 1);
4129 tgl 205 ECB :
206 : /* Call the config function to get config info for the opclass */
4129 tgl 207 GIC 198 : in.attType = atttype;
4129 tgl 208 ECB :
4129 tgl 209 GIC 198 : procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC);
4129 tgl 210 CBC 198 : FunctionCall2Coll(procinfo,
734 211 198 : index->rd_indcollation[spgKeyColumn],
4129 tgl 212 ECB : PointerGetDatum(&in),
4129 tgl 213 GIC 198 : PointerGetDatum(&cache->config));
4129 tgl 214 ECB :
215 : /*
216 : * If leafType isn't specified, use the declared index column type,
217 : * which index.c will have derived from the opclass's opcintype.
218 : * (Although we now make spgvalidate.c warn if these aren't the same,
219 : * old user-defined opclasses may not set the STORAGE parameter
220 : * correctly, so believe leafType if it's given.)
221 : */
735 tgl 222 GIC 198 : if (!OidIsValid(cache->config.leafType))
505 tgl 223 ECB : {
735 tgl 224 GIC 183 : cache->config.leafType =
734 tgl 225 CBC 183 : TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid;
735 tgl 226 ECB :
227 : /*
228 : * If index column type is binary-coercible to atttype (for
229 : * example, it's a domain over atttype), treat it as plain atttype
230 : * to avoid thinking we need to compress.
231 : */
505 tgl 232 GIC 190 : if (cache->config.leafType != atttype &&
505 tgl 233 CBC 7 : IsBinaryCoercible(cache->config.leafType, atttype))
234 7 : cache->config.leafType = atttype;
505 tgl 235 ECB : }
236 :
237 : /* Get the information we need about each relevant datatype */
4129 tgl 238 GIC 198 : fillTypeDesc(&cache->attType, atttype);
1934 teodor 239 ECB :
735 tgl 240 GIC 198 : if (cache->config.leafType != atttype)
1934 teodor 241 ECB : {
1934 teodor 242 GIC 15 : if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
1934 teodor 243 LBC 0 : ereport(ERROR,
1934 teodor 244 EUB : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
245 : errmsg("compress method must be defined when leaf type is different from input type")));
246 :
1934 teodor 247 GIC 15 : fillTypeDesc(&cache->attLeafType, cache->config.leafType);
1934 teodor 248 ECB : }
249 : else
250 : {
251 : /* Save lookups in this common case */
1934 teodor 252 GIC 183 : cache->attLeafType = cache->attType;
1934 teodor 253 ECB : }
254 :
4129 tgl 255 GIC 198 : fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
4129 tgl 256 CBC 198 : fillTypeDesc(&cache->attLabelType, cache->config.labelType);
4129 tgl 257 ECB :
258 : /* Last, get the lastUsedPages data from the metapage */
4129 tgl 259 GIC 198 : metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
4129 tgl 260 CBC 198 : LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
4129 tgl 261 ECB :
2545 kgrittn 262 GIC 198 : metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
4129 tgl 263 ECB :
4129 tgl 264 GIC 198 : if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
4129 tgl 265 LBC 0 : elog(ERROR, "index \"%s\" is not an SP-GiST index",
4129 tgl 266 EUB : RelationGetRelationName(index));
267 :
4129 tgl 268 GIC 198 : cache->lastUsedPages = metadata->lastUsedPages;
4129 tgl 269 ECB :
4129 tgl 270 GIC 198 : UnlockReleaseBuffer(metabuffer);
4129 tgl 271 ECB :
4129 tgl 272 GIC 198 : index->rd_amcache = (void *) cache;
4129 tgl 273 ECB : }
274 : else
275 : {
276 : /* assume it's up to date */
4129 tgl 277 GIC 1344887 : cache = (SpGistCache *) index->rd_amcache;
4129 tgl 278 ECB : }
279 :
4129 tgl 280 GIC 1345085 : return cache;
4129 tgl 281 ECB : }
282 :
283 : /*
284 : * Compute a tuple descriptor for leaf tuples or index-only-scan result tuples.
285 : *
286 : * We can use the relcache's tupdesc as-is in many cases, and it's always
287 : * OK so far as any INCLUDE columns are concerned. However, the entry for
288 : * the key column has to match leafType in the first case or attType in the
289 : * second case. While the relcache's tupdesc *should* show leafType, this
290 : * might not hold for legacy user-defined opclasses, since before v14 they
291 : * were not allowed to declare their true storage type in CREATE OPCLASS.
292 : * Also, attType can be different from what is in the relcache.
293 : *
294 : * This function gives back either a pointer to the relcache's tupdesc
295 : * if that is suitable, or a palloc'd copy that's been adjusted to match
296 : * the specified key column type. We can avoid doing any catalog lookups
297 : * here by insisting that the caller pass an SpGistTypeDesc not just an OID.
298 : */
299 : TupleDesc
734 tgl 300 GIC 122865 : getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType)
734 tgl 301 ECB : {
302 : TupleDesc outTupDesc;
303 : Form_pg_attribute att;
304 :
734 tgl 305 GIC 122865 : if (keyType->type ==
734 tgl 306 CBC 122865 : TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid)
307 122800 : outTupDesc = RelationGetDescr(index);
734 tgl 308 ECB : else
309 : {
734 tgl 310 GIC 65 : outTupDesc = CreateTupleDescCopy(RelationGetDescr(index));
734 tgl 311 CBC 65 : att = TupleDescAttr(outTupDesc, spgKeyColumn);
734 tgl 312 ECB : /* It's sufficient to update the type-dependent fields of the column */
734 tgl 313 GIC 65 : att->atttypid = keyType->type;
734 tgl 314 CBC 65 : att->atttypmod = -1;
315 65 : att->attlen = keyType->attlen;
316 65 : att->attbyval = keyType->attbyval;
317 65 : att->attalign = keyType->attalign;
318 65 : att->attstorage = keyType->attstorage;
734 tgl 319 ECB : /* We shouldn't need to bother with making these valid: */
734 tgl 320 GIC 65 : att->attcompression = InvalidCompressionMethod;
686 tgl 321 CBC 65 : att->attcollation = InvalidOid;
734 tgl 322 ECB : /* In case we changed typlen, we'd better reset following offsets */
734 tgl 323 GIC 73 : for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++)
734 tgl 324 CBC 8 : TupleDescAttr(outTupDesc, i)->attcacheoff = -1;
734 tgl 325 ECB : }
734 tgl 326 GIC 122865 : return outTupDesc;
734 tgl 327 ECB : }
328 :
329 : /* Initialize SpGistState for working with the given index */
330 : void
4131 tgl 331 GIC 122413 : initSpGistState(SpGistState *state, Relation index)
4131 tgl 332 ECB : {
333 : SpGistCache *cache;
334 :
734 tgl 335 GIC 122413 : state->index = index;
734 tgl 336 ECB :
337 : /* Get cached static information about index */
4129 tgl 338 GIC 122413 : cache = spgGetCache(index);
4129 tgl 339 ECB :
4129 tgl 340 GIC 122413 : state->config = cache->config;
4129 tgl 341 CBC 122413 : state->attType = cache->attType;
1934 teodor 342 122413 : state->attLeafType = cache->attLeafType;
4129 tgl 343 122413 : state->attPrefixType = cache->attPrefixType;
344 122413 : state->attLabelType = cache->attLabelType;
4131 tgl 345 ECB :
346 : /* Ensure we have a valid descriptor for leaf tuples */
734 tgl 347 GIC 122413 : state->leafTupDesc = getSpGistTupleDesc(state->index, &state->attLeafType);
734 tgl 348 ECB :
349 : /* Make workspace for constructing dead tuples */
4131 tgl 350 GIC 122413 : state->deadTupleStorage = palloc0(SGDTSIZE);
4131 tgl 351 ECB :
352 : /* Set XID to use in redirection tuples */
4131 tgl 353 GIC 122413 : state->myXid = GetTopTransactionIdIfAny();
4131 tgl 354 ECB :
355 : /* Assume we're not in an index build (spgbuild will override) */
4131 tgl 356 GIC 122413 : state->isBuild = false;
4131 tgl 357 CBC 122413 : }
4131 tgl 358 ECB :
359 : /*
360 : * Allocate a new page (either by recycling, or by extending the index file).
361 : *
362 : * The returned buffer is already pinned and exclusive-locked.
363 : * Caller is responsible for initializing the page by calling SpGistInitBuffer.
364 : */
365 : Buffer
4131 tgl 366 GIC 3263 : SpGistNewBuffer(Relation index)
4131 tgl 367 ECB : {
368 : Buffer buffer;
369 :
370 : /* First, try to get a page from FSM */
371 : for (;;)
4131 tgl 372 UBC 0 : {
4131 tgl 373 CBC 3263 : BlockNumber blkno = GetFreeIndexPage(index);
374 :
375 3263 : if (blkno == InvalidBlockNumber)
376 3259 : break; /* nothing known to FSM */
377 :
378 : /*
379 : * The fixed pages shouldn't ever be listed in FSM, but just in case
380 : * one is, ignore it.
381 : */
4046 382 4 : if (SpGistBlockIsFixed(blkno))
4131 tgl 383 UBC 0 : continue;
384 :
4131 tgl 385 CBC 4 : buffer = ReadBuffer(index, blkno);
386 :
387 : /*
388 : * We have to guard against the possibility that someone else already
389 : * recycled this page; the buffer may be locked if so.
390 : */
391 4 : if (ConditionalLockBuffer(buffer))
392 : {
2545 kgrittn 393 4 : Page page = BufferGetPage(buffer);
394 :
4131 tgl 395 4 : if (PageIsNew(page))
396 1 : return buffer; /* OK to use, if never initialized */
397 :
398 3 : if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
399 3 : return buffer; /* OK to use */
400 :
4131 tgl 401 UBC 0 : LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
402 : }
403 :
404 : /* Can't use it, so release buffer and try again */
405 0 : ReleaseBuffer(buffer);
406 : }
407 :
4 andres 408 GNC 3259 : buffer = ExtendBufferedRel(EB_REL(index), MAIN_FORKNUM, NULL,
409 : EB_LOCK_FIRST);
410 :
4131 tgl 411 GIC 3259 : return buffer;
412 : }
413 :
4131 tgl 414 ECB : /*
415 : * Update index metapage's lastUsedPages info from local cache, if possible
416 : *
417 : * Updating meta page isn't critical for index working, so
418 : * 1 use ConditionalLockBuffer to improve concurrency
419 : * 2 don't WAL-log metabuffer changes to decrease WAL traffic
420 : */
421 : void
4131 tgl 422 CBC 121363 : SpGistUpdateMetaPage(Relation index)
423 : {
424 121363 : SpGistCache *cache = (SpGistCache *) index->rd_amcache;
425 :
426 121363 : if (cache != NULL)
4131 tgl 427 ECB : {
428 : Buffer metabuffer;
429 :
4131 tgl 430 GIC 121363 : metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
431 :
432 121363 : if (ConditionalLockBuffer(metabuffer))
433 : {
1984 434 121363 : Page metapage = BufferGetPage(metabuffer);
435 121363 : SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage);
436 :
4129 437 121363 : metadata->lastUsedPages = cache->lastUsedPages;
438 :
1984 tgl 439 ECB : /*
440 : * Set pd_lower just past the end of the metadata. This is
441 : * essential, because without doing so, metadata will be lost if
442 : * xlog.c compresses the page. (We must do this here because
443 : * pre-v11 versions of PG did not set the metapage's pd_lower
444 : * correctly, so a pg_upgraded index might contain the wrong
445 : * value.)
446 : */
1984 tgl 447 GBC 121363 : ((PageHeader) metapage)->pd_lower =
1984 tgl 448 GIC 121363 : ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage;
449 :
4131 tgl 450 CBC 121363 : MarkBufferDirty(metabuffer);
4131 tgl 451 GIC 121363 : UnlockReleaseBuffer(metabuffer);
452 : }
453 : else
454 : {
4131 tgl 455 UIC 0 : ReleaseBuffer(metabuffer);
456 : }
457 : }
4131 tgl 458 GIC 121363 : }
459 :
460 : /* Macro to select proper element of lastUsedPages cache depending on flags */
461 : /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
462 : #define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
463 :
464 : /*
465 : * Allocate and initialize a new buffer of the type and parity specified by
466 : * flags. The returned buffer is already pinned and exclusive-locked.
467 : *
468 : * When requesting an inner page, if we get one with the wrong parity,
469 : * we just release the buffer and try again. We will get a different page
470 : * because GetFreeIndexPage will have marked the page used in FSM. The page
471 : * is entered in our local lastUsedPages cache, so there's some hope of
472 : * making use of it later in this session, but otherwise we rely on VACUUM
473 : * to eventually re-enter the page in FSM, making it available for recycling.
474 : * Note that such a page does not get marked dirty here, so unless it's used
475 : * fairly soon, the buffer will just get discarded and the page will remain
476 : * as it was on disk.
4131 tgl 477 ECB : *
478 : * When we return a buffer to the caller, the page is *not* entered into
479 : * the lastUsedPages cache; we expect the caller will do so after it's taken
3260 bruce 480 : * whatever space it will use. This is because after the caller has used up
481 : * some space, the page might have less space than whatever was cached already
4131 tgl 482 : * so we'd rather not trash the old cache entry.
483 : */
484 : static Buffer
4131 tgl 485 GBC 2920 : allocNewBuffer(Relation index, int flags)
486 : {
4131 tgl 487 GIC 2920 : SpGistCache *cache = spgGetCache(index);
4046 tgl 488 CBC 2920 : uint16 pageflags = 0;
489 :
4046 tgl 490 GIC 2920 : if (GBUF_REQ_LEAF(flags))
4046 tgl 491 CBC 2867 : pageflags |= SPGIST_LEAF;
492 2920 : if (GBUF_REQ_NULLS(flags))
4046 tgl 493 UIC 0 : pageflags |= SPGIST_NULLS;
4131 tgl 494 ECB :
495 : for (;;)
4131 tgl 496 GIC 43 : {
4131 tgl 497 ECB : Buffer buffer;
498 :
4131 tgl 499 GIC 2963 : buffer = SpGistNewBuffer(index);
4046 500 2963 : SpGistInitBuffer(buffer, pageflags);
4131 tgl 501 ECB :
4046 tgl 502 CBC 2963 : if (pageflags & SPGIST_LEAF)
503 : {
4131 tgl 504 ECB : /* Leaf pages have no parity concerns, so just use it */
4131 tgl 505 GIC 2867 : return buffer;
506 : }
4131 tgl 507 ECB : else
508 : {
4131 tgl 509 GIC 96 : BlockNumber blkno = BufferGetBlockNumber(buffer);
3955 bruce 510 96 : int blkFlags = GBUF_INNER_PARITY(blkno);
511 :
4046 tgl 512 CBC 96 : if ((flags & GBUF_PARITY_MASK) == blkFlags)
4131 tgl 513 EUB : {
4131 tgl 514 ECB : /* Page has right parity, use it */
4131 tgl 515 CBC 53 : return buffer;
4131 tgl 516 ECB : }
517 : else
518 : {
519 : /* Page has wrong parity, record it in cache and try again */
4046 tgl 520 GIC 43 : if (pageflags & SPGIST_NULLS)
4046 tgl 521 UIC 0 : blkFlags |= GBUF_NULLS;
4046 tgl 522 GIC 43 : cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
523 43 : cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
2545 kgrittn 524 43 : PageGetExactFreeSpace(BufferGetPage(buffer));
4131 tgl 525 43 : UnlockReleaseBuffer(buffer);
526 : }
527 : }
528 : }
529 : }
530 :
531 : /*
532 : * Get a buffer of the type and parity specified by flags, having at least
4131 tgl 533 ECB : * as much free space as indicated by needSpace. We use the lastUsedPages
534 : * cache to assign the same buffer previously requested when possible.
535 : * The returned buffer is already pinned and exclusive-locked.
536 : *
537 : * *isNew is set true if the page was initialized here, false if it was
538 : * already valid.
539 : */
4131 tgl 540 EUB : Buffer
4131 tgl 541 GIC 5420 : SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
542 : {
543 5420 : SpGistCache *cache = spgGetCache(index);
544 : SpGistLastUsedPage *lup;
545 :
546 : /* Bail out if even an empty page wouldn't meet the demand */
547 5420 : if (needSpace > SPGIST_PAGE_CAPACITY)
4131 tgl 548 UIC 0 : elog(ERROR, "desired SPGiST tuple size is too big");
4131 tgl 549 ECB :
550 : /*
551 : * If possible, increase the space request to include relation's
552 : * fillfactor. This ensures that when we add unrelated tuples to a page,
553 : * we try to keep 100-fillfactor% available for adding tuples that are
554 : * related to the ones already on it. But fillfactor mustn't cause an
555 : * error for requests that would otherwise be legal.
556 : */
1231 michael 557 GIC 5420 : needSpace += SpGistGetTargetPageFreeSpace(index);
4131 tgl 558 CBC 5420 : needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
4131 tgl 559 ECB :
560 : /* Get the cache entry for this flags setting */
4131 tgl 561 GIC 5420 : lup = GET_LUP(cache, flags);
562 :
4131 tgl 563 ECB : /* If we have nothing cached, just turn it over to allocNewBuffer */
4131 tgl 564 GIC 5420 : if (lup->blkno == InvalidBlockNumber)
565 : {
4131 tgl 566 CBC 91 : *isNew = true;
4131 tgl 567 GIC 91 : return allocNewBuffer(index, flags);
568 : }
569 :
570 : /* fixed pages should never be in cache */
4046 tgl 571 CBC 5329 : Assert(!SpGistBlockIsFixed(lup->blkno));
572 :
4131 tgl 573 ECB : /* If cached freeSpace isn't enough, don't bother looking at the page */
4131 tgl 574 GIC 5329 : if (lup->freeSpace >= needSpace)
575 : {
576 : Buffer buffer;
577 : Page page;
4131 tgl 578 EUB :
4131 tgl 579 GBC 2500 : buffer = ReadBuffer(index, lup->blkno);
4131 tgl 580 EUB :
4131 tgl 581 GIC 2500 : if (!ConditionalLockBuffer(buffer))
582 : {
4131 tgl 583 ECB : /*
584 : * buffer is locked by another process, so return a new buffer
585 : */
4131 tgl 586 UIC 0 : ReleaseBuffer(buffer);
587 0 : *isNew = true;
4131 tgl 588 LBC 0 : return allocNewBuffer(index, flags);
589 : }
4131 tgl 590 ECB :
2545 kgrittn 591 CBC 2500 : page = BufferGetPage(buffer);
4131 tgl 592 ECB :
4131 tgl 593 GBC 2500 : if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
4131 tgl 594 ECB : {
595 : /* OK to initialize the page */
4046 tgl 596 CBC 93 : uint16 pageflags = 0;
4046 tgl 597 ECB :
4046 tgl 598 GIC 93 : if (GBUF_REQ_LEAF(flags))
599 90 : pageflags |= SPGIST_LEAF;
600 93 : if (GBUF_REQ_NULLS(flags))
4046 tgl 601 UIC 0 : pageflags |= SPGIST_NULLS;
4046 tgl 602 GIC 93 : SpGistInitBuffer(buffer, pageflags);
4131 603 93 : lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
4131 tgl 604 CBC 93 : *isNew = true;
605 93 : return buffer;
606 : }
4131 tgl 607 ECB :
608 : /*
609 : * Check that page is of right type and has enough space. We must
610 : * recheck this since our cache isn't necessarily up to date.
611 : */
4046 tgl 612 CBC 4814 : if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) &&
613 2407 : (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page)))
4131 tgl 614 ECB : {
4131 tgl 615 GIC 2407 : int freeSpace = PageGetExactFreeSpace(page);
616 :
617 2407 : if (freeSpace >= needSpace)
618 : {
619 : /* Success, update freespace info and return the buffer */
620 2407 : lup->freeSpace = freeSpace - needSpace;
4131 tgl 621 GBC 2407 : *isNew = false;
4131 tgl 622 GIC 2407 : return buffer;
623 : }
624 : }
4131 tgl 625 ECB :
626 : /*
627 : * fallback to allocation of new buffer
628 : */
4131 tgl 629 UIC 0 : UnlockReleaseBuffer(buffer);
630 : }
631 :
632 : /* No success with cache, so return a new buffer */
4131 tgl 633 GIC 2829 : *isNew = true;
634 2829 : return allocNewBuffer(index, flags);
635 : }
636 :
4131 tgl 637 ECB : /*
638 : * Update lastUsedPages cache when done modifying a page.
639 : *
640 : * We update the appropriate cache entry if it already contained this page
641 : * (its freeSpace is likely obsolete), or if this page has more space than
642 : * whatever we had cached.
643 : */
644 : void
4131 tgl 645 GIC 1213425 : SpGistSetLastUsedPage(Relation index, Buffer buffer)
646 : {
4131 tgl 647 CBC 1213425 : SpGistCache *cache = spgGetCache(index);
4131 tgl 648 ECB : SpGistLastUsedPage *lup;
649 : int freeSpace;
2545 kgrittn 650 CBC 1213425 : Page page = BufferGetPage(buffer);
4131 tgl 651 1213425 : BlockNumber blkno = BufferGetBlockNumber(buffer);
652 : int flags;
4131 tgl 653 ECB :
4046 654 : /* Never enter fixed pages (root pages) in cache, though */
4046 tgl 655 GBC 1213425 : if (SpGistBlockIsFixed(blkno))
4131 tgl 656 GIC 403184 : return;
4131 tgl 657 ECB :
4131 tgl 658 GIC 810241 : if (SpGistPageIsLeaf(page))
4131 tgl 659 CBC 418014 : flags = GBUF_LEAF;
4131 tgl 660 ECB : else
4131 tgl 661 CBC 392227 : flags = GBUF_INNER_PARITY(blkno);
4046 tgl 662 GIC 810241 : if (SpGistPageStoresNulls(page))
4046 tgl 663 LBC 0 : flags |= GBUF_NULLS;
4131 tgl 664 ECB :
4131 tgl 665 GIC 810241 : lup = GET_LUP(cache, flags);
666 :
667 810241 : freeSpace = PageGetExactFreeSpace(page);
668 810241 : if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
669 227267 : lup->freeSpace < freeSpace)
670 : {
671 587322 : lup->blkno = blkno;
4131 tgl 672 CBC 587322 : lup->freeSpace = freeSpace;
673 : }
674 : }
675 :
4131 tgl 676 ECB : /*
677 : * Initialize an SPGiST page to empty, with specified flags
678 : */
679 : void
4131 tgl 680 CBC 3794 : SpGistInitPage(Page page, uint16 f)
681 : {
682 : SpGistPageOpaque opaque;
683 :
732 michael 684 GIC 3794 : PageInit(page, BLCKSZ, sizeof(SpGistPageOpaqueData));
4131 tgl 685 3794 : opaque = SpGistPageGetOpaque(page);
4131 tgl 686 CBC 3794 : opaque->flags = f;
4131 tgl 687 GIC 3794 : opaque->spgist_page_id = SPGIST_PAGE_ID;
4131 tgl 688 CBC 3794 : }
4131 tgl 689 ECB :
690 : /*
691 : * Initialize a buffer's page to empty, with specified flags
692 : */
693 : void
4131 tgl 694 GIC 3682 : SpGistInitBuffer(Buffer b, uint16 f)
695 : {
4131 tgl 696 CBC 3682 : Assert(BufferGetPageSize(b) == BLCKSZ);
2545 kgrittn 697 GIC 3682 : SpGistInitPage(BufferGetPage(b), f);
4131 tgl 698 3682 : }
699 :
700 : /*
4131 tgl 701 ECB : * Initialize metadata page
702 : */
703 : void
4131 tgl 704 CBC 104 : SpGistInitMetapage(Page page)
705 : {
706 : SpGistMetaPageData *metadata;
4046 tgl 707 ECB : int i;
4131 708 :
4131 tgl 709 GIC 104 : SpGistInitPage(page, SPGIST_META);
710 104 : metadata = SpGistPageGetMeta(page);
711 104 : memset(metadata, 0, sizeof(SpGistMetaPageData));
712 104 : metadata->magicNumber = SPGIST_MAGIC_NUMBER;
713 :
714 : /* initialize last-used-page cache to empty */
4046 tgl 715 CBC 936 : for (i = 0; i < SPGIST_CACHED_PAGES; i++)
716 832 : metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
1984 tgl 717 ECB :
718 : /*
719 : * Set pd_lower just past the end of the metadata. This is essential,
720 : * because without doing so, metadata will be lost if xlog.c compresses
721 : * the page.
722 : */
1984 tgl 723 CBC 104 : ((PageHeader) page)->pd_lower =
1984 tgl 724 GIC 104 : ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page;
4131 725 104 : }
726 :
727 : /*
728 : * reloptions processing for SPGiST
4131 tgl 729 ECB : */
730 : bytea *
2639 tgl 731 GIC 50 : spgoptions(Datum reloptions, bool validate)
732 : {
733 : static const relopt_parse_elt tab[] = {
734 : {"fillfactor", RELOPT_TYPE_INT, offsetof(SpGistOptions, fillfactor)},
735 : };
736 :
1231 michael 737 50 : return (bytea *) build_reloptions(reloptions, validate,
738 : RELOPT_KIND_SPGIST,
739 : sizeof(SpGistOptions),
740 : tab, lengthof(tab));
741 : }
742 :
4131 tgl 743 ECB : /*
744 : * Get the space needed to store a non-null datum of the indicated type
745 : * in an inner tuple (that is, as a prefix or node label).
746 : * Note the result is already rounded up to a MAXALIGN boundary.
738 747 : * Here we follow the convention that pass-by-val types are just stored
748 : * in their Datum representation (compare memcpyInnerDatum).
4131 749 : */
750 : unsigned int
738 tgl 751 GIC 6163 : SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum)
4131 tgl 752 ECB : {
753 : unsigned int size;
754 :
4131 tgl 755 GIC 6163 : if (att->attbyval)
756 3240 : size = sizeof(Datum);
757 2923 : else if (att->attlen > 0)
758 1991 : size = att->attlen;
759 : else
760 932 : size = VARSIZE_ANY(datum);
4131 tgl 761 ECB :
4131 tgl 762 GIC 6163 : return MAXALIGN(size);
763 : }
764 :
738 tgl 765 ECB : /*
766 : * Copy the given non-null datum to *target, in the inner-tuple case
4131 767 : */
768 : static void
738 tgl 769 GIC 6163 : memcpyInnerDatum(void *target, SpGistTypeDesc *att, Datum datum)
770 : {
4131 tgl 771 ECB : unsigned int size;
772 :
4131 tgl 773 GIC 6163 : if (att->attbyval)
4131 tgl 774 ECB : {
4131 tgl 775 GIC 3240 : memcpy(target, &datum, sizeof(Datum));
776 : }
777 : else
778 : {
779 2923 : size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
780 2923 : memcpy(target, DatumGetPointer(datum), size);
781 : }
4131 tgl 782 CBC 6163 : }
783 :
784 : /*
785 : * Compute space required for a leaf tuple holding the given data.
786 : *
734 tgl 787 ECB : * This must match the size-calculation portion of spgFormLeafTuple.
738 788 : */
789 : Size
734 tgl 790 GIC 9734764 : SpGistGetLeafTupleSize(TupleDesc tupleDescriptor,
791 : Datum *datums, bool *isnulls)
792 : {
793 : Size size;
794 : Size data_size;
795 9734764 : bool needs_null_mask = false;
796 9734764 : int natts = tupleDescriptor->natts;
738 tgl 797 ECB :
798 : /*
734 799 : * Decide whether we need a nulls bitmask.
800 : *
801 : * If there is only a key attribute (natts == 1), never use a bitmask, for
802 : * compatibility with the pre-v14 layout of leaf tuples. Otherwise, we
803 : * need one if any attribute is null.
804 : */
734 tgl 805 GIC 9734764 : if (natts > 1)
806 : {
807 506508 : for (int i = 0; i < natts; i++)
808 : {
809 346367 : if (isnulls[i])
810 : {
811 8846 : needs_null_mask = true;
734 tgl 812 CBC 8846 : break;
813 : }
814 : }
815 : }
816 :
734 tgl 817 ECB : /*
818 : * Calculate size of the data part; same as for heap tuples.
819 : */
734 tgl 820 GIC 9734764 : data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls);
821 :
822 : /*
823 : * Compute total size.
824 : */
734 tgl 825 CBC 9734764 : size = SGLTHDRSZ(needs_null_mask);
734 tgl 826 GBC 9734764 : size += data_size;
734 tgl 827 GIC 9734764 : size = MAXALIGN(size);
734 tgl 828 ECB :
829 : /*
830 : * Ensure that we can replace the tuple with a dead tuple later. This test
831 : * is unnecessary when there are any non-null attributes, but be safe.
832 : */
734 tgl 833 GIC 9734764 : if (size < SGDTSIZE)
734 tgl 834 UIC 0 : size = SGDTSIZE;
734 tgl 835 ECB :
734 tgl 836 GIC 9734764 : return size;
837 : }
838 :
4131 tgl 839 ECB : /*
840 : * Construct a leaf tuple containing the given heap TID and datum values
841 : */
842 : SpGistLeafTuple
4046 tgl 843 CBC 765280 : spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
734 tgl 844 ECB : Datum *datums, bool *isnulls)
845 : {
4131 846 : SpGistLeafTuple tup;
734 tgl 847 GIC 765280 : TupleDesc tupleDescriptor = state->leafTupDesc;
848 : Size size;
849 : Size hoff;
850 : Size data_size;
851 765280 : bool needs_null_mask = false;
852 765280 : int natts = tupleDescriptor->natts;
853 : char *tp; /* ptr to tuple data */
854 765280 : uint16 tupmask = 0; /* unused heap_fill_tuple output */
4131 tgl 855 ECB :
856 : /*
734 857 : * Decide whether we need a nulls bitmask.
858 : *
859 : * If there is only a key attribute (natts == 1), never use a bitmask, for
860 : * compatibility with the pre-v14 layout of leaf tuples. Otherwise, we
861 : * need one if any attribute is null.
862 : */
734 tgl 863 GIC 765280 : if (natts > 1)
864 : {
865 215459 : for (int i = 0; i < natts; i++)
866 : {
867 149583 : if (isnulls[i])
868 : {
869 5986 : needs_null_mask = true;
734 tgl 870 CBC 5986 : break;
871 : }
872 : }
873 : }
874 :
4131 tgl 875 ECB : /*
734 876 : * Calculate size of the data part; same as for heap tuples.
877 : */
734 tgl 878 GIC 765280 : data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls);
879 :
880 : /*
881 : * Compute total size.
882 : */
734 tgl 883 CBC 765280 : hoff = SGLTHDRSZ(needs_null_mask);
734 tgl 884 GBC 765280 : size = hoff + data_size;
734 tgl 885 GIC 765280 : size = MAXALIGN(size);
886 :
734 tgl 887 ECB : /*
888 : * Ensure that we can replace the tuple with a dead tuple later. This test
889 : * is unnecessary when there are any non-null attributes, but be safe.
4131 890 : */
4131 tgl 891 CBC 765280 : if (size < SGDTSIZE)
4131 tgl 892 UIC 0 : size = SGDTSIZE;
4131 tgl 893 ECB :
894 : /* OK, form the tuple */
4131 tgl 895 CBC 765280 : tup = (SpGistLeafTuple) palloc0(size);
896 :
4131 tgl 897 GIC 765280 : tup->size = size;
734 898 765280 : SGLT_SET_NEXTOFFSET(tup, InvalidOffsetNumber);
4131 899 765280 : tup->heapPtr = *heapPtr;
734 tgl 900 ECB :
734 tgl 901 GIC 765280 : tp = (char *) tup + hoff;
734 tgl 902 ECB :
734 tgl 903 CBC 765280 : if (needs_null_mask)
904 : {
905 : bits8 *bp; /* ptr to null bitmap in tuple */
734 tgl 906 ECB :
907 : /* Set nullmask presence bit in SpGistLeafTuple header */
734 tgl 908 GIC 5986 : SGLT_SET_HASNULLMASK(tup, true);
734 tgl 909 ECB : /* Fill the data area and null mask */
734 tgl 910 GIC 5986 : bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData));
911 5986 : heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size,
912 : &tupmask, bp);
913 : }
734 tgl 914 CBC 759294 : else if (natts > 1 || !isnulls[spgKeyColumn])
915 : {
916 : /* Fill data area only */
734 tgl 917 GIC 759258 : heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size,
918 : &tupmask, (bits8 *) NULL);
919 : }
920 : /* otherwise we have no data, nor a bitmap, to fill */
921 :
4131 922 765280 : return tup;
923 : }
4131 tgl 924 ECB :
925 : /*
926 : * Construct a node (to go into an inner tuple) containing the given label
927 : *
928 : * Note that the node's downlink is just set invalid here. Caller will fill
929 : * it in later.
930 : */
931 : SpGistNodeTuple
4131 tgl 932 CBC 20299 : spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
4131 tgl 933 ECB : {
934 : SpGistNodeTuple tup;
935 : unsigned int size;
4131 tgl 936 GIC 20299 : unsigned short infomask = 0;
937 :
938 : /* compute space needed (note result is already maxaligned) */
4131 tgl 939 CBC 20299 : size = SGNTHDRSZ;
4131 tgl 940 GBC 20299 : if (!isnull)
738 tgl 941 GIC 2895 : size += SpGistGetInnerTypeSize(&state->attLabelType, label);
942 :
943 : /*
944 : * Here we make sure that the size will fit in the field reserved for it
4131 tgl 945 ECB : * in t_info.
946 : */
4131 tgl 947 CBC 20299 : if ((size & INDEX_SIZE_MASK) != size)
4131 tgl 948 LBC 0 : ereport(ERROR,
949 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
3363 tgl 950 ECB : errmsg("index row requires %zu bytes, maximum size is %zu",
951 : (Size) size, (Size) INDEX_SIZE_MASK)));
952 :
4131 tgl 953 GIC 20299 : tup = (SpGistNodeTuple) palloc0(size);
4131 tgl 954 ECB :
4131 tgl 955 GIC 20299 : if (isnull)
4131 tgl 956 CBC 17404 : infomask |= INDEX_NULL_MASK;
4131 tgl 957 ECB : /* we don't bother setting the INDEX_VAR_MASK bit */
4131 tgl 958 GIC 20299 : infomask |= size;
4131 tgl 959 CBC 20299 : tup->t_info = infomask;
960 :
961 : /* The TID field will be filled in later */
4131 tgl 962 GIC 20299 : ItemPointerSetInvalid(&tup->t_tid);
963 :
964 20299 : if (!isnull)
738 965 2895 : memcpyInnerDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
4131 tgl 966 ECB :
4131 tgl 967 GIC 20299 : return tup;
968 : }
969 :
970 : /*
971 : * Construct an inner tuple containing the given prefix and node array
972 : */
973 : SpGistInnerTuple
974 4265 : spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
975 : int nNodes, SpGistNodeTuple *nodes)
4131 tgl 976 ECB : {
977 : SpGistInnerTuple tup;
978 : unsigned int size;
979 : unsigned int prefixSize;
980 : int i;
981 : char *ptr;
982 :
983 : /* Compute size needed */
4131 tgl 984 CBC 4265 : if (hasPrefix)
738 985 3268 : prefixSize = SpGistGetInnerTypeSize(&state->attPrefixType, prefix);
986 : else
4131 tgl 987 GIC 997 : prefixSize = 0;
988 :
989 4265 : size = SGITHDRSZ + prefixSize;
990 :
4131 tgl 991 ECB : /* Note: we rely on node tuple sizes to be maxaligned already */
4131 tgl 992 GBC 29617 : for (i = 0; i < nNodes; i++)
4131 tgl 993 GIC 25352 : size += IndexTupleSize(nodes[i]);
994 :
995 : /*
996 : * Ensure that we can replace the tuple with a dead tuple later. This
4131 tgl 997 ECB : * test is unnecessary given current tuple layouts, but let's be safe.
4131 tgl 998 EUB : */
4131 tgl 999 GIC 4265 : if (size < SGDTSIZE)
4131 tgl 1000 UIC 0 : size = SGDTSIZE;
1001 :
1002 : /*
1003 : * Inner tuple should be small enough to fit on a page
1004 : */
4131 tgl 1005 GIC 4265 : if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
4131 tgl 1006 UIC 0 : ereport(ERROR,
1007 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1008 : errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
3363 tgl 1009 ECB : (Size) size,
1010 : SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
1011 : errhint("Values larger than a buffer page cannot be indexed.")));
4131 tgl 1012 EUB :
1013 : /*
1014 : * Check for overflow of header fields --- probably can't fail if the
4131 tgl 1015 ECB : * above succeeded, but let's be paranoid
1016 : */
4131 tgl 1017 CBC 4265 : if (size > SGITMAXSIZE ||
1018 4265 : prefixSize > SGITMAXPREFIXSIZE ||
4131 tgl 1019 ECB : nNodes > SGITMAXNNODES)
4131 tgl 1020 UIC 0 : elog(ERROR, "SPGiST inner tuple header field is too small");
4131 tgl 1021 ECB :
1022 : /* OK, form the tuple */
4131 tgl 1023 GIC 4265 : tup = (SpGistInnerTuple) palloc0(size);
4131 tgl 1024 ECB :
4131 tgl 1025 GIC 4265 : tup->nNodes = nNodes;
4131 tgl 1026 CBC 4265 : tup->prefixSize = prefixSize;
4131 tgl 1027 GIC 4265 : tup->size = size;
4131 tgl 1028 ECB :
4131 tgl 1029 GIC 4265 : if (hasPrefix)
738 tgl 1030 CBC 3268 : memcpyInnerDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
4131 tgl 1031 ECB :
4131 tgl 1032 GIC 4265 : ptr = (char *) SGITNODEPTR(tup);
1033 :
4131 tgl 1034 CBC 29617 : for (i = 0; i < nNodes; i++)
1035 : {
4131 tgl 1036 GIC 25352 : SpGistNodeTuple node = nodes[i];
1037 :
1038 25352 : memcpy(ptr, node, IndexTupleSize(node));
1039 25352 : ptr += IndexTupleSize(node);
1040 : }
1041 :
1042 4265 : return tup;
1043 : }
1044 :
1045 : /*
1046 : * Construct a "dead" tuple to replace a tuple being deleted.
1047 : *
1048 : * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
4131 tgl 1049 ECB : * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
1050 : * the xid field is filled in automatically.
1051 : *
1052 : * This is called in critical sections, so we don't use palloc; the tuple
1053 : * is built in preallocated storage. It should be copied before another
1054 : * call with different parameters can occur.
1055 : */
1056 : SpGistDeadTuple
4131 tgl 1057 GIC 6491 : spgFormDeadTuple(SpGistState *state, int tupstate,
4131 tgl 1058 ECB : BlockNumber blkno, OffsetNumber offnum)
1059 : {
4131 tgl 1060 CBC 6491 : SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
4131 tgl 1061 ECB :
4131 tgl 1062 CBC 6491 : tuple->tupstate = tupstate;
4131 tgl 1063 GIC 6491 : tuple->size = SGDTSIZE;
734 1064 6491 : SGLT_SET_NEXTOFFSET(tuple, InvalidOffsetNumber);
1065 :
4131 tgl 1066 CBC 6491 : if (tupstate == SPGIST_REDIRECT)
4131 tgl 1067 ECB : {
4131 tgl 1068 GIC 1183 : ItemPointerSet(&tuple->pointer, blkno, offnum);
3902 1069 1183 : Assert(TransactionIdIsValid(state->myXid));
4131 tgl 1070 CBC 1183 : tuple->xid = state->myXid;
1071 : }
1072 : else
1073 : {
4131 tgl 1074 GIC 5308 : ItemPointerSetInvalid(&tuple->pointer);
1075 5308 : tuple->xid = InvalidTransactionId;
1076 : }
1077 :
1078 6491 : return tuple;
1079 : }
4131 tgl 1080 ECB :
1081 : /*
1082 : * Convert an SPGiST leaf tuple into Datum/isnull arrays.
734 1083 : *
1084 : * The caller must allocate sufficient storage for the output arrays.
1085 : * (INDEX_MAX_KEYS entries should be enough.)
1086 : */
1087 : void
734 tgl 1088 GIC 32262 : spgDeformLeafTuple(SpGistLeafTuple tup, TupleDesc tupleDescriptor,
1089 : Datum *datums, bool *isnulls, bool keyColumnIsNull)
1090 : {
1091 32262 : bool hasNullsMask = SGLT_GET_HASNULLMASK(tup);
1092 : char *tp; /* ptr to tuple data */
1093 : bits8 *bp; /* ptr to null bitmap in tuple */
1094 :
1095 32262 : if (keyColumnIsNull && tupleDescriptor->natts == 1)
1096 : {
1097 : /*
734 tgl 1098 EUB : * Trivial case: there is only the key attribute and we're in a nulls
1099 : * tree. The hasNullsMask bit in the tuple header should not be set
1100 : * (and thus we can't use index_deform_tuple_internal), but
1101 : * nonetheless the result is NULL.
1102 : *
1103 : * Note: currently this is dead code, because noplace calls this when
1104 : * there is only the key attribute. But we should cover the case.
734 tgl 1105 ECB : */
734 tgl 1106 LBC 0 : Assert(!hasNullsMask);
1107 :
1108 0 : datums[spgKeyColumn] = (Datum) 0;
734 tgl 1109 UIC 0 : isnulls[spgKeyColumn] = true;
1110 0 : return;
1111 : }
1112 :
734 tgl 1113 GIC 32262 : tp = (char *) tup + SGLTHDRSZ(hasNullsMask);
1114 32262 : bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData));
1115 :
734 tgl 1116 CBC 32262 : index_deform_tuple_internal(tupleDescriptor,
1117 : datums, isnulls,
1118 : tp, bp, hasNullsMask);
1119 :
1120 : /*
1121 : * Key column isnull value from the tuple should be consistent with
1122 : * keyColumnIsNull flag from the caller.
1123 : */
734 tgl 1124 GIC 32262 : Assert(keyColumnIsNull == isnulls[spgKeyColumn]);
734 tgl 1125 ECB : }
1126 :
1127 : /*
1128 : * Extract the label datums of the nodes within innerTuple
1129 : *
1130 : * Returns NULL if label datums are NULLs
1131 : */
4131 1132 : Datum *
4131 tgl 1133 CBC 9345213 : spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
1134 : {
4131 tgl 1135 ECB : Datum *nodeLabels;
1136 : int i;
1137 : SpGistNodeTuple node;
4131 tgl 1138 EUB :
1139 : /* Either all the labels must be NULL, or none. */
3875 heikki.linnakangas 1140 GIC 9345213 : node = SGITNODEPTR(innerTuple);
3875 heikki.linnakangas 1141 CBC 9345213 : if (IndexTupleHasNulls(node))
1142 : {
3875 heikki.linnakangas 1143 GIC 50357870 : SGITITERATE(innerTuple, i, node)
1144 : {
3875 heikki.linnakangas 1145 CBC 41130387 : if (!IndexTupleHasNulls(node))
3875 heikki.linnakangas 1146 LBC 0 : elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
1147 : }
4131 tgl 1148 ECB : /* They're all null, so just return NULL */
4131 tgl 1149 GBC 9227483 : return NULL;
4131 tgl 1150 ECB : }
1151 : else
3875 heikki.linnakangas 1152 : {
3875 heikki.linnakangas 1153 GIC 117730 : nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
1154 1346773 : SGITITERATE(innerTuple, i, node)
1155 : {
1156 1229043 : if (IndexTupleHasNulls(node))
3875 heikki.linnakangas 1157 UIC 0 : elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
3875 heikki.linnakangas 1158 GIC 1229043 : nodeLabels[i] = SGNTDATUM(node, state);
1159 : }
1160 117730 : return nodeLabels;
1161 : }
1162 : }
1163 :
1164 : /*
1165 : * Add a new item to the page, replacing a PLACEHOLDER item if possible.
1166 : * Return the location it's inserted at, or InvalidOffsetNumber on failure.
1167 : *
4131 tgl 1168 ECB : * If startOffset isn't NULL, we start searching for placeholders at
1169 : * *startOffset, and update that to the next place to search. This is just
1170 : * an optimization for repeated insertions.
1171 : *
1172 : * If errorOK is false, we throw error when there's not enough room,
1173 : * rather than returning InvalidOffsetNumber.
1174 : */
1175 : OffsetNumber
4131 tgl 1176 CBC 809617 : SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
4131 tgl 1177 ECB : OffsetNumber *startOffset, bool errorOK)
1178 : {
4131 tgl 1179 GIC 809617 : SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
4131 tgl 1180 ECB : OffsetNumber i,
1181 : maxoff,
1182 : offnum;
1183 :
4131 tgl 1184 GIC 809617 : if (opaque->nPlaceholder > 0 &&
4131 tgl 1185 CBC 231664 : PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
4131 tgl 1186 ECB : {
1187 : /* Try to replace a placeholder */
4131 tgl 1188 CBC 231664 : maxoff = PageGetMaxOffsetNumber(page);
1189 231664 : offnum = InvalidOffsetNumber;
1190 :
4131 tgl 1191 ECB : for (;;)
1192 : {
4131 tgl 1193 GIC 231664 : if (startOffset && *startOffset != InvalidOffsetNumber)
4131 tgl 1194 CBC 57892 : i = *startOffset;
1195 : else
1196 173772 : i = FirstOffsetNumber;
1197 15894921 : for (; i <= maxoff; i++)
1198 : {
4131 tgl 1199 GIC 15894921 : SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
1200 : PageGetItemId(page, i));
1201 :
4131 tgl 1202 CBC 15894921 : if (it->tupstate == SPGIST_PLACEHOLDER)
4131 tgl 1203 ECB : {
4131 tgl 1204 GIC 231664 : offnum = i;
4131 tgl 1205 GBC 231664 : break;
1206 : }
1207 : }
4131 tgl 1208 EUB :
1209 : /* Done if we found a placeholder */
4131 tgl 1210 GIC 231664 : if (offnum != InvalidOffsetNumber)
1211 231664 : break;
1212 :
4131 tgl 1213 UBC 0 : if (startOffset && *startOffset != InvalidOffsetNumber)
4131 tgl 1214 EUB : {
1215 : /* Hint was no good, re-search from beginning */
4131 tgl 1216 UIC 0 : *startOffset = InvalidOffsetNumber;
4131 tgl 1217 LBC 0 : continue;
1218 : }
1219 :
4131 tgl 1220 ECB : /* Hmm, no placeholder found? */
4131 tgl 1221 UIC 0 : opaque->nPlaceholder = 0;
4131 tgl 1222 LBC 0 : break;
1223 : }
1224 :
4131 tgl 1225 GIC 231664 : if (offnum != InvalidOffsetNumber)
1226 : {
1227 : /* Replace the placeholder tuple */
1228 231664 : PageIndexTupleDelete(page, offnum);
1229 :
4131 tgl 1230 CBC 231664 : offnum = PageAddItem(page, item, size, offnum, false, false);
1231 :
4131 tgl 1232 ECB : /*
1233 : * We should not have failed given the size check at the top of
1234 : * the function, but test anyway. If we did fail, we must PANIC
1235 : * because we've already deleted the placeholder tuple, and
1236 : * there's no other way to keep the damage from getting to disk.
1237 : */
4131 tgl 1238 GBC 231664 : if (offnum != InvalidOffsetNumber)
1239 : {
4131 tgl 1240 GIC 231664 : Assert(opaque->nPlaceholder > 0);
4131 tgl 1241 CBC 231664 : opaque->nPlaceholder--;
4131 tgl 1242 GIC 231664 : if (startOffset)
1243 59227 : *startOffset = offnum + 1;
1244 : }
1245 : else
609 peter 1246 LBC 0 : elog(PANIC, "failed to add item of size %zu to SPGiST index page",
1247 : size);
1248 :
4131 tgl 1249 CBC 231664 : return offnum;
4131 tgl 1250 EUB : }
1251 : }
1252 :
4131 tgl 1253 ECB : /* No luck in replacing a placeholder, so just add it to the page */
4131 tgl 1254 GIC 577953 : offnum = PageAddItem(page, item, size,
1255 : InvalidOffsetNumber, false, false);
1256 :
1257 577953 : if (offnum == InvalidOffsetNumber && !errorOK)
609 peter 1258 UIC 0 : elog(ERROR, "failed to add item of size %zu to SPGiST index page",
1259 : size);
1260 :
4131 tgl 1261 GIC 577953 : return offnum;
1262 : }
1663 akorotkov 1263 ECB :
1264 : /*
1265 : * spgproperty() -- Check boolean properties of indexes.
1266 : *
1267 : * This is optional for most AMs, but is required for SP-GiST because the core
1268 : * property code doesn't support AMPROP_DISTANCE_ORDERABLE.
1269 : */
1270 : bool
1663 akorotkov 1271 GIC 93 : spgproperty(Oid index_oid, int attno,
1272 : IndexAMProperty prop, const char *propname,
1273 : bool *res, bool *isnull)
1663 akorotkov 1274 ECB : {
1275 : Oid opclass,
1276 : opfamily,
1277 : opcintype;
1278 : CatCList *catlist;
1279 : int i;
1280 :
1281 : /* Only answer column-level inquiries */
1663 akorotkov 1282 CBC 93 : if (attno == 0)
1663 akorotkov 1283 GIC 33 : return false;
1284 :
1285 60 : switch (prop)
1286 : {
1287 6 : case AMPROP_DISTANCE_ORDERABLE:
1288 6 : break;
1289 54 : default:
1290 54 : return false;
1291 : }
1663 akorotkov 1292 ECB :
1293 : /*
1294 : * Currently, SP-GiST distance-ordered scans require that there be a
1663 akorotkov 1295 EUB : * distance operator in the opclass with the default types. So we assume
96 michael 1296 : * that if such an operator exists, then there's a reason for it.
1297 : */
1298 :
1299 : /* First we need to know the column's opclass. */
1663 akorotkov 1300 CBC 6 : opclass = get_index_column_opclass(index_oid, attno);
1663 akorotkov 1301 GIC 6 : if (!OidIsValid(opclass))
1663 akorotkov 1302 EUB : {
1663 akorotkov 1303 UBC 0 : *isnull = true;
1663 akorotkov 1304 UIC 0 : return true;
1305 : }
1306 :
1663 akorotkov 1307 ECB : /* Now look up the opclass family and input datatype. */
1663 akorotkov 1308 GIC 6 : if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
1309 : {
1663 akorotkov 1310 LBC 0 : *isnull = true;
1663 akorotkov 1311 UIC 0 : return true;
1663 akorotkov 1312 ECB : }
1313 :
1314 : /* And now we can check whether the operator is provided. */
1663 akorotkov 1315 CBC 6 : catlist = SearchSysCacheList1(AMOPSTRATEGY,
1316 : ObjectIdGetDatum(opfamily));
1663 akorotkov 1317 ECB :
1663 akorotkov 1318 CBC 6 : *res = false;
1663 akorotkov 1319 ECB :
1663 akorotkov 1320 CBC 51 : for (i = 0; i < catlist->n_members; i++)
1321 : {
1663 akorotkov 1322 GIC 48 : HeapTuple amoptup = &catlist->members[i]->tuple;
1663 akorotkov 1323 CBC 48 : Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(amoptup);
1663 akorotkov 1324 ECB :
1663 akorotkov 1325 GIC 48 : if (amopform->amoppurpose == AMOP_ORDER &&
1326 3 : (amopform->amoplefttype == opcintype ||
1327 3 : amopform->amoprighttype == opcintype) &&
1663 akorotkov 1328 CBC 3 : opfamily_can_sort_type(amopform->amopsortfamily,
1329 : get_op_rettype(amopform->amopopr)))
1663 akorotkov 1330 ECB : {
1663 akorotkov 1331 GIC 3 : *res = true;
1663 akorotkov 1332 CBC 3 : break;
1333 : }
1334 : }
1335 :
1663 akorotkov 1336 GIC 6 : ReleaseSysCacheList(catlist);
1337 :
1338 6 : *isnull = false;
1339 :
1340 6 : return true;
1341 : }
|