Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * spgtextproc.c
4 : * implementation of radix tree (compressed trie) over text
5 : *
6 : * In a text_ops SPGiST index, inner tuples can have a prefix which is the
7 : * common prefix of all strings indexed under that tuple. The node labels
8 : * represent the next byte of the string(s) after the prefix. Assuming we
9 : * always use the longest possible prefix, we will get more than one node
10 : * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
11 : *
12 : * To reconstruct the indexed string for any index entry, concatenate the
13 : * inner-tuple prefixes and node labels starting at the root and working
14 : * down to the leaf entry, then append the datum in the leaf entry.
15 : * (While descending the tree, "level" is the number of bytes reconstructed
16 : * so far.)
17 : *
18 : * However, there are two special cases for node labels: -1 indicates that
19 : * there are no more bytes after the prefix-so-far, and -2 indicates that we
20 : * had to split an existing allTheSame tuple (in such a case we have to create
21 : * a node label that doesn't correspond to any string byte). In either case,
22 : * the node label does not contribute anything to the reconstructed string.
23 : *
24 : * Previously, we used a node label of zero for both special cases, but
25 : * this was problematic because one can't tell whether a string ending at
26 : * the current level can be pushed down into such a child node. For
27 : * backwards compatibility, we still support such node labels for reading;
28 : * but no new entries will ever be pushed down into a zero-labeled child.
29 : * No new entries ever get pushed into a -2-labeled child, either.
30 : *
31 : *
32 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
33 : * Portions Copyright (c) 1994, Regents of the University of California
34 : *
35 : * IDENTIFICATION
36 : * src/backend/access/spgist/spgtextproc.c
37 : *
38 : *-------------------------------------------------------------------------
39 : */
40 : #include "postgres.h"
41 :
42 : #include "access/spgist.h"
43 : #include "catalog/pg_type.h"
44 : #include "mb/pg_wchar.h"
45 : #include "utils/builtins.h"
46 : #include "utils/datum.h"
47 : #include "utils/pg_locale.h"
48 : #include "utils/varlena.h"
49 : #include "varatt.h"
50 :
51 :
52 : /*
53 : * In the worst case, an inner tuple in a text radix tree could have as many
54 : * as 258 nodes (one for each possible byte value, plus the two special
55 : * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner
56 : * tuple must fit on an index page of size BLCKSZ. Rather than assuming we
57 : * know the exact amount of overhead imposed by page headers, tuple headers,
58 : * etc, we leave 100 bytes for that (the actual overhead should be no more
59 : * than 56 bytes at this writing, so there is slop in this number).
60 : * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
61 : * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
62 : * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
63 : * tuple size exceeds maximum" if there are too many distinct next-byte values
64 : * at a given place in the tree. Since use of nonstandard block sizes appears
65 : * to be negligible in the field, we just live with that fact for now,
66 : * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
67 : * than default.
68 : */
69 : #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32)
70 :
71 : /*
72 : * Strategy for collation aware operator on text is equal to btree strategy
73 : * plus value of 10.
74 : *
75 : * Current collation aware strategies and their corresponding btree strategies:
76 : * 11 BTLessStrategyNumber
77 : * 12 BTLessEqualStrategyNumber
78 : * 14 BTGreaterEqualStrategyNumber
79 : * 15 BTGreaterStrategyNumber
80 : */
81 : #define SPG_STRATEGY_ADDITION (10)
82 : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
83 : && (s) != RTPrefixStrategyNumber)
84 :
85 : /* Struct for sorting values in picksplit */
86 : typedef struct spgNodePtr
87 : {
88 : Datum d;
89 : int i;
90 : int16 c;
91 : } spgNodePtr;
92 :
93 :
94 : Datum
4131 tgl 95 GIC 32 : spg_text_config(PG_FUNCTION_ARGS)
4131 tgl 96 ECB : {
97 : /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
4131 tgl 98 GIC 32 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
4131 tgl 99 ECB :
4131 tgl 100 GIC 32 : cfg->prefixType = TEXTOID;
3226 tgl 101 CBC 32 : cfg->labelType = INT2OID;
4129 102 32 : cfg->canReturnData = true;
4131 103 32 : cfg->longValuesOK = true; /* suffixing will shorten long values */
104 32 : PG_RETURN_VOID();
4131 tgl 105 ECB : }
106 :
107 : /*
108 : * Form a text datum from the given not-necessarily-null-terminated string,
109 : * using short varlena header format if possible
110 : */
111 : static Datum
4131 tgl 112 GIC 130608 : formTextDatum(const char *data, int datalen)
4131 tgl 113 ECB : {
114 : char *p;
115 :
4131 tgl 116 GIC 130608 : p = (char *) palloc(datalen + VARHDRSZ);
4131 tgl 117 ECB :
4131 tgl 118 GIC 130608 : if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
4131 tgl 119 ECB : {
4131 tgl 120 GIC 130608 : SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
4131 tgl 121 CBC 130608 : if (datalen)
122 122789 : memcpy(p + VARHDRSZ_SHORT, data, datalen);
4131 tgl 123 ECB : }
124 : else
125 : {
4131 tgl 126 UIC 0 : SET_VARSIZE(p, datalen + VARHDRSZ);
4131 tgl 127 UBC 0 : memcpy(p + VARHDRSZ, data, datalen);
4131 tgl 128 EUB : }
129 :
4131 tgl 130 GIC 130608 : return PointerGetDatum(p);
4131 tgl 131 ECB : }
132 :
133 : /*
134 : * Find the length of the common prefix of a and b
135 : */
136 : static int
4131 tgl 137 GIC 48123 : commonPrefix(const char *a, const char *b, int lena, int lenb)
4131 tgl 138 ECB : {
4131 tgl 139 GIC 48123 : int i = 0;
4131 tgl 140 ECB :
4131 tgl 141 GIC 3379944 : while (i < lena && i < lenb && *a == *b)
4131 tgl 142 ECB : {
4131 tgl 143 GIC 3331821 : a++;
4131 tgl 144 CBC 3331821 : b++;
145 3331821 : i++;
4131 tgl 146 ECB : }
147 :
4131 tgl 148 GIC 48123 : return i;
4131 tgl 149 ECB : }
150 :
151 : /*
152 : * Binary search an array of int16 datums for a match to c
153 : *
154 : * On success, *i gets the match location; on failure, it gets where to insert
155 : */
156 : static bool
3226 tgl 157 GIC 105510 : searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
4131 tgl 158 ECB : {
4131 tgl 159 GIC 105510 : int StopLow = 0,
4131 tgl 160 CBC 105510 : StopHigh = nNodes;
4131 tgl 161 ECB :
4131 tgl 162 GIC 290633 : while (StopLow < StopHigh)
4131 tgl 163 ECB : {
4131 tgl 164 GIC 289958 : int StopMiddle = (StopLow + StopHigh) >> 1;
3226 tgl 165 CBC 289958 : int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
4131 tgl 166 ECB :
4131 tgl 167 GIC 289958 : if (c < middle)
4131 tgl 168 CBC 92221 : StopHigh = StopMiddle;
169 197737 : else if (c > middle)
170 92902 : StopLow = StopMiddle + 1;
4131 tgl 171 ECB : else
172 : {
4131 tgl 173 GIC 104835 : *i = StopMiddle;
4131 tgl 174 CBC 104835 : return true;
4131 tgl 175 ECB : }
176 : }
177 :
4131 tgl 178 GIC 675 : *i = StopHigh;
4131 tgl 179 CBC 675 : return false;
4131 tgl 180 ECB : }
181 :
182 : Datum
4131 tgl 183 GIC 105822 : spg_text_choose(PG_FUNCTION_ARGS)
4131 tgl 184 ECB : {
4131 tgl 185 GIC 105822 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
4131 tgl 186 CBC 105822 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
187 105822 : text *inText = DatumGetTextPP(in->datum);
188 105822 : char *inStr = VARDATA_ANY(inText);
189 105822 : int inSize = VARSIZE_ANY_EXHDR(inText);
3226 190 105822 : char *prefixStr = NULL;
191 105822 : int prefixSize = 0;
4131 192 105822 : int commonLen = 0;
3226 193 105822 : int16 nodeChar = 0;
194 105822 : int i = 0;
4131 tgl 195 ECB :
196 : /* Check for prefix match, set nodeChar to first byte after prefix */
4131 tgl 197 GIC 105822 : if (in->hasPrefix)
4131 tgl 198 ECB : {
4131 tgl 199 GIC 41912 : text *prefixText = DatumGetTextPP(in->prefixDatum);
3226 tgl 200 ECB :
3226 tgl 201 GIC 41912 : prefixStr = VARDATA_ANY(prefixText);
3226 tgl 202 CBC 41912 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
4131 tgl 203 ECB :
4131 tgl 204 GIC 41912 : commonLen = commonPrefix(inStr + in->level,
4131 tgl 205 ECB : prefixStr,
4131 tgl 206 GIC 41912 : inSize - in->level,
4131 tgl 207 ECB : prefixSize);
208 :
4131 tgl 209 GIC 41912 : if (commonLen == prefixSize)
4131 tgl 210 ECB : {
4131 tgl 211 GIC 41600 : if (inSize - in->level > commonLen)
3226 tgl 212 CBC 38522 : nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
4131 tgl 213 ECB : else
3226 tgl 214 GIC 3078 : nodeChar = -1;
4131 tgl 215 ECB : }
216 : else
217 : {
218 : /* Must split tuple because incoming value doesn't match prefix */
4131 tgl 219 GIC 312 : out->resultType = spgSplitTuple;
4131 tgl 220 ECB :
4131 tgl 221 GIC 312 : if (commonLen == 0)
4131 tgl 222 ECB : {
4131 tgl 223 GIC 9 : out->result.splitTuple.prefixHasPrefix = false;
4131 tgl 224 ECB : }
225 : else
226 : {
4131 tgl 227 GIC 303 : out->result.splitTuple.prefixHasPrefix = true;
4131 tgl 228 CBC 303 : out->result.splitTuple.prefixPrefixDatum =
229 303 : formTextDatum(prefixStr, commonLen);
4131 tgl 230 ECB : }
2420 tgl 231 GIC 312 : out->result.splitTuple.prefixNNodes = 1;
2420 tgl 232 CBC 312 : out->result.splitTuple.prefixNodeLabels =
233 312 : (Datum *) palloc(sizeof(Datum));
234 624 : out->result.splitTuple.prefixNodeLabels[0] =
3226 235 312 : Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
4131 tgl 236 ECB :
2420 tgl 237 GIC 312 : out->result.splitTuple.childNodeN = 0;
2420 tgl 238 ECB :
4131 tgl 239 GIC 312 : if (prefixSize - commonLen == 1)
4131 tgl 240 ECB : {
4131 tgl 241 GIC 306 : out->result.splitTuple.postfixHasPrefix = false;
4131 tgl 242 ECB : }
243 : else
244 : {
4131 tgl 245 GIC 6 : out->result.splitTuple.postfixHasPrefix = true;
4131 tgl 246 CBC 6 : out->result.splitTuple.postfixPrefixDatum =
247 6 : formTextDatum(prefixStr + commonLen + 1,
248 6 : prefixSize - commonLen - 1);
4131 tgl 249 ECB : }
250 :
4131 tgl 251 GIC 312 : PG_RETURN_VOID();
4131 tgl 252 ECB : }
253 : }
4131 tgl 254 GIC 63910 : else if (inSize > in->level)
4131 tgl 255 ECB : {
3226 tgl 256 GIC 63411 : nodeChar = *(unsigned char *) (inStr + in->level);
4131 tgl 257 ECB : }
258 : else
259 : {
3226 tgl 260 GIC 499 : nodeChar = -1;
4131 tgl 261 ECB : }
262 :
263 : /* Look up nodeChar in the node label array */
4131 tgl 264 GIC 105510 : if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
4131 tgl 265 ECB : {
266 : /*
267 : * Descend to existing node. (If in->allTheSame, the core code will
268 : * ignore our nodeN specification here, but that's OK. We still have
269 : * to provide the correct levelAdd and restDatum values, and those are
270 : * the same regardless of which node gets chosen by core.)
271 : */
272 : int levelAdd;
273 :
4131 tgl 274 GIC 104835 : out->resultType = spgMatchNode;
4131 tgl 275 CBC 104835 : out->result.matchNode.nodeN = i;
3226 276 104835 : levelAdd = commonLen;
277 104835 : if (nodeChar >= 0)
278 101261 : levelAdd++;
279 104835 : out->result.matchNode.levelAdd = levelAdd;
280 104835 : if (inSize - in->level - levelAdd > 0)
4131 281 101258 : out->result.matchNode.restDatum =
3226 282 101258 : formTextDatum(inStr + in->level + levelAdd,
283 101258 : inSize - in->level - levelAdd);
4131 tgl 284 ECB : else
4131 tgl 285 GIC 3577 : out->result.matchNode.restDatum =
4131 tgl 286 CBC 3577 : formTextDatum(NULL, 0);
4131 tgl 287 ECB : }
4131 tgl 288 GIC 675 : else if (in->allTheSame)
4131 tgl 289 ECB : {
290 : /*
291 : * Can't use AddNode action, so split the tuple. The upper tuple has
292 : * the same prefix as before and uses a dummy node label -2 for the
293 : * lower tuple. The lower tuple has no prefix and the same node
294 : * labels as the original tuple.
295 : *
296 : * Note: it might seem tempting to shorten the upper tuple's prefix,
297 : * if it has one, then use its last byte as label for the lower tuple.
298 : * But that doesn't win since we know the incoming value matches the
299 : * whole prefix: we'd just end up splitting the lower tuple again.
300 : */
4131 tgl 301 GIC 3 : out->resultType = spgSplitTuple;
4131 tgl 302 CBC 3 : out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
303 3 : out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
2420 304 3 : out->result.splitTuple.prefixNNodes = 1;
305 3 : out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
306 3 : out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
307 3 : out->result.splitTuple.childNodeN = 0;
4131 308 3 : out->result.splitTuple.postfixHasPrefix = false;
4131 tgl 309 ECB : }
310 : else
311 : {
312 : /* Add a node for the not-previously-seen nodeChar value */
4131 tgl 313 GIC 672 : out->resultType = spgAddNode;
3226 tgl 314 CBC 672 : out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
4131 315 672 : out->result.addNode.nodeN = i;
4131 tgl 316 ECB : }
317 :
4131 tgl 318 GIC 105510 : PG_RETURN_VOID();
4131 tgl 319 ECB : }
320 :
321 : /* qsort comparator to sort spgNodePtr structs by "c" */
322 : static int
4131 tgl 323 GIC 57875 : cmpNodePtr(const void *a, const void *b)
4131 tgl 324 ECB : {
4131 tgl 325 GIC 57875 : const spgNodePtr *aa = (const spgNodePtr *) a;
4131 tgl 326 CBC 57875 : const spgNodePtr *bb = (const spgNodePtr *) b;
4131 tgl 327 ECB :
3226 tgl 328 GIC 57875 : return aa->c - bb->c;
4131 tgl 329 ECB : }
330 :
331 : Datum
4131 tgl 332 GIC 262 : spg_text_picksplit(PG_FUNCTION_ARGS)
4131 tgl 333 ECB : {
4131 tgl 334 GIC 262 : spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
4131 tgl 335 CBC 262 : spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
336 262 : text *text0 = DatumGetTextPP(in->datums[0]);
4131 tgl 337 ECB : int i,
338 : commonLen;
339 : spgNodePtr *nodes;
340 :
341 : /* Identify longest common prefix, if any */
4131 tgl 342 GIC 262 : commonLen = VARSIZE_ANY_EXHDR(text0);
4131 tgl 343 CBC 6473 : for (i = 1; i < in->nTuples && commonLen > 0; i++)
4131 tgl 344 ECB : {
4131 tgl 345 GIC 6211 : text *texti = DatumGetTextPP(in->datums[i]);
4131 tgl 346 CBC 18633 : int tmp = commonPrefix(VARDATA_ANY(text0),
347 6211 : VARDATA_ANY(texti),
348 6211 : VARSIZE_ANY_EXHDR(text0),
349 6211 : VARSIZE_ANY_EXHDR(texti));
4131 tgl 350 ECB :
4131 tgl 351 GIC 6211 : if (tmp < commonLen)
4131 tgl 352 CBC 209 : commonLen = tmp;
4131 tgl 353 ECB : }
354 :
355 : /*
356 : * Limit the prefix length, if necessary, to ensure that the resulting
357 : * inner tuple will fit on a page.
358 : */
4131 tgl 359 GIC 262 : commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
4131 tgl 360 ECB :
361 : /* Set node prefix to be that string, if it's not empty */
4131 tgl 362 GIC 262 : if (commonLen == 0)
4131 tgl 363 ECB : {
4131 tgl 364 GIC 219 : out->hasPrefix = false;
4131 tgl 365 ECB : }
366 : else
367 : {
4131 tgl 368 GIC 43 : out->hasPrefix = true;
4131 tgl 369 CBC 43 : out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
4131 tgl 370 ECB : }
371 :
372 : /* Extract the node label (first non-common byte) from each value */
4131 tgl 373 GIC 262 : nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
4131 tgl 374 ECB :
4131 tgl 375 GIC 25683 : for (i = 0; i < in->nTuples; i++)
4131 tgl 376 ECB : {
4131 tgl 377 GIC 25421 : text *texti = DatumGetTextPP(in->datums[i]);
4131 tgl 378 ECB :
4131 tgl 379 GIC 25421 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
3226 tgl 380 CBC 22042 : nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
4131 tgl 381 ECB : else
3226 tgl 382 GIC 3379 : nodes[i].c = -1; /* use -1 if string is all common */
4131 tgl 383 CBC 25421 : nodes[i].i = i;
384 25421 : nodes[i].d = in->datums[i];
4131 tgl 385 ECB : }
386 :
387 : /*
388 : * Sort by label values so that we can group the values into nodes. This
389 : * also ensures that the nodes are ordered by label value, allowing the
390 : * use of binary search in searchChar.
391 : */
4131 tgl 392 GIC 262 : qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
4131 tgl 393 ECB :
394 : /* And emit results */
4131 tgl 395 GIC 262 : out->nNodes = 0;
4131 tgl 396 CBC 262 : out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
397 262 : out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
398 262 : out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
4131 tgl 399 ECB :
4131 tgl 400 GIC 25683 : for (i = 0; i < in->nTuples; i++)
4131 tgl 401 ECB : {
4131 tgl 402 GIC 25421 : text *texti = DatumGetTextPP(nodes[i].d);
4131 tgl 403 ECB : Datum leafD;
404 :
4131 tgl 405 GIC 25421 : if (i == 0 || nodes[i].c != nodes[i - 1].c)
4131 tgl 406 ECB : {
3226 tgl 407 GIC 1609 : out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
4131 tgl 408 CBC 1609 : out->nNodes++;
4131 tgl 409 ECB : }
410 :
4131 tgl 411 GIC 25421 : if (commonLen < VARSIZE_ANY_EXHDR(texti))
4131 tgl 412 CBC 22042 : leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
413 22042 : VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
4131 tgl 414 ECB : else
4131 tgl 415 GIC 3379 : leafD = formTextDatum(NULL, 0);
4131 tgl 416 ECB :
4131 tgl 417 GIC 25421 : out->leafTupleDatums[nodes[i].i] = leafD;
4131 tgl 418 CBC 25421 : out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
4131 tgl 419 ECB : }
420 :
4131 tgl 421 GIC 262 : PG_RETURN_VOID();
4131 tgl 422 ECB : }
423 :
424 : Datum
4131 tgl 425 GIC 1170 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
4131 tgl 426 ECB : {
4131 tgl 427 GIC 1170 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
4131 tgl 428 CBC 1170 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
4047 429 1170 : bool collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
2654 tgl 430 ECB : text *reconstructedValue;
431 : text *reconstrText;
432 : int maxReconstrLen;
4131 tgl 433 GIC 1170 : text *prefixText = NULL;
4131 tgl 434 CBC 1170 : int prefixSize = 0;
4047 tgl 435 ECB : int i;
436 :
437 : /*
438 : * Reconstruct values represented at this tuple, including parent data,
439 : * prefix of this tuple if any, and the node label if it's non-dummy.
440 : * in->level should be the length of the previously reconstructed value,
441 : * and the number of bytes added here is prefixSize or prefixSize + 1.
442 : *
443 : * Note: we assume that in->reconstructedValue isn't toasted and doesn't
444 : * have a short varlena header. This is okay because it must have been
445 : * created by a previous invocation of this routine, and we always emit
446 : * long-format reconstructed values.
447 : */
2654 tgl 448 GIC 1170 : reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
2654 tgl 449 CBC 1170 : Assert(reconstructedValue == NULL ? in->level == 0 :
2654 tgl 450 ECB : VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
451 :
4131 tgl 452 GIC 1170 : maxReconstrLen = in->level + 1;
4131 tgl 453 CBC 1170 : if (in->hasPrefix)
4131 tgl 454 ECB : {
4131 tgl 455 GIC 234 : prefixText = DatumGetTextPP(in->prefixDatum);
4131 tgl 456 CBC 234 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
457 234 : maxReconstrLen += prefixSize;
4131 tgl 458 ECB : }
459 :
4131 tgl 460 GIC 1170 : reconstrText = palloc(VARHDRSZ + maxReconstrLen);
4131 tgl 461 CBC 1170 : SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
4131 tgl 462 ECB :
4131 tgl 463 GIC 1170 : if (in->level)
4131 tgl 464 CBC 1080 : memcpy(VARDATA(reconstrText),
2654 465 1080 : VARDATA(reconstructedValue),
4131 466 1080 : in->level);
467 1170 : if (prefixSize)
468 234 : memcpy(((char *) VARDATA(reconstrText)) + in->level,
469 234 : VARDATA_ANY(prefixText),
4131 tgl 470 ECB : prefixSize);
471 : /* last byte of reconstrText will be filled in below */
472 :
473 : /*
474 : * Scan the child nodes. For each one, complete the reconstructed value
475 : * and see if it's consistent with the query. If so, emit an entry into
476 : * the output arrays.
477 : */
4131 tgl 478 GIC 1170 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
4131 tgl 479 CBC 1170 : out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
480 1170 : out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
481 1170 : out->nNodes = 0;
4131 tgl 482 ECB :
4131 tgl 483 GIC 11664 : for (i = 0; i < in->nNodes; i++)
4131 tgl 484 ECB : {
3226 tgl 485 GIC 10494 : int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
4131 tgl 486 ECB : int thisLen;
4047 tgl 487 GIC 10494 : bool res = true;
4047 tgl 488 ECB : int j;
489 :
490 : /* If nodeChar is a dummy value, don't include it in data */
3226 tgl 491 GIC 10494 : if (nodeChar <= 0)
4131 tgl 492 CBC 2382 : thisLen = maxReconstrLen - 1;
4131 tgl 493 ECB : else
494 : {
3226 tgl 495 GIC 8112 : ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
4131 tgl 496 CBC 8112 : thisLen = maxReconstrLen;
4131 tgl 497 ECB : }
498 :
4047 tgl 499 GIC 18942 : for (j = 0; j < in->nkeys; j++)
4131 tgl 500 ECB : {
4047 tgl 501 GIC 10494 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
4047 tgl 502 ECB : text *inText;
503 : int inSize;
504 : int r;
505 :
506 : /*
507 : * If it's a collation-aware operator, but the collation is C, we
508 : * can treat it as non-collation-aware. With non-C collation we
509 : * need to traverse whole tree :-( so there's no point in making
510 : * any check here. (Note also that our reconstructed value may
511 : * well end with a partial multibyte character, so that applying
512 : * any encoding-sensitive test to it would be risky anyhow.)
513 : */
1832 teodor 514 GIC 10494 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
4047 tgl 515 ECB : {
4047 tgl 516 GIC 7352 : if (collate_is_c)
1832 teodor 517 LBC 0 : strategy -= SPG_STRATEGY_ADDITION;
4047 tgl 518 EUB : else
4047 tgl 519 GIC 7352 : continue;
4047 tgl 520 ECB : }
521 :
4047 tgl 522 GIC 3142 : inText = DatumGetTextPP(in->scankeys[j].sk_argument);
4047 tgl 523 CBC 3142 : inSize = VARSIZE_ANY_EXHDR(inText);
4047 tgl 524 ECB :
4047 tgl 525 GIC 3142 : r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
4047 tgl 526 CBC 3142 : Min(inSize, thisLen));
4047 tgl 527 ECB :
4047 tgl 528 GIC 3142 : switch (strategy)
4047 tgl 529 ECB : {
4047 tgl 530 GIC 528 : case BTLessStrategyNumber:
4047 tgl 531 ECB : case BTLessEqualStrategyNumber:
4047 tgl 532 GIC 528 : if (r > 0)
4047 tgl 533 CBC 300 : res = false;
534 528 : break;
535 1798 : case BTEqualStrategyNumber:
536 1798 : if (r != 0 || inSize < thisLen)
537 1050 : res = false;
538 1798 : break;
539 408 : case BTGreaterEqualStrategyNumber:
4047 tgl 540 ECB : case BTGreaterStrategyNumber:
4047 tgl 541 GIC 408 : if (r < 0)
4047 tgl 542 CBC 312 : res = false;
543 408 : break;
1832 teodor 544 408 : case RTPrefixStrategyNumber:
545 408 : if (r != 0)
546 384 : res = false;
547 408 : break;
4047 tgl 548 LBC 0 : default:
4047 tgl 549 UBC 0 : elog(ERROR, "unrecognized strategy number: %d",
4047 tgl 550 EUB : in->scankeys[j].sk_strategy);
551 : break;
552 : }
553 :
4047 tgl 554 GIC 3142 : if (!res)
4047 tgl 555 CBC 2046 : break; /* no need to consider remaining conditions */
4131 tgl 556 ECB : }
557 :
4131 tgl 558 GIC 10494 : if (res)
4131 tgl 559 ECB : {
4131 tgl 560 GIC 8448 : out->nodeNumbers[out->nNodes] = i;
4131 tgl 561 CBC 8448 : out->levelAdds[out->nNodes] = thisLen - in->level;
562 8448 : SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
563 16896 : out->reconstructedValues[out->nNodes] =
564 8448 : datumCopy(PointerGetDatum(reconstrText), false, -1);
565 8448 : out->nNodes++;
4131 tgl 566 ECB : }
567 : }
568 :
4131 tgl 569 GIC 1170 : PG_RETURN_VOID();
4131 tgl 570 ECB : }
571 :
572 : Datum
4131 tgl 573 GIC 166410 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
4131 tgl 574 ECB : {
4131 tgl 575 GIC 166410 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
4131 tgl 576 CBC 166410 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
577 166410 : int level = in->level;
4131 tgl 578 ECB : text *leafValue,
4131 tgl 579 GIC 166410 : *reconstrValue = NULL;
4131 tgl 580 ECB : char *fullValue;
581 : int fullLen;
582 : bool res;
583 : int j;
584 :
585 : /* all tests are exact */
4131 tgl 586 GIC 166410 : out->recheck = false;
4131 tgl 587 ECB :
4131 tgl 588 GIC 166410 : leafValue = DatumGetTextPP(in->leafDatum);
4131 tgl 589 ECB :
590 : /* As above, in->reconstructedValue isn't toasted or short. */
4131 tgl 591 GIC 166410 : if (DatumGetPointer(in->reconstructedValue))
2219 noah 592 CBC 166398 : reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
4131 tgl 593 ECB :
2654 tgl 594 GIC 166410 : Assert(reconstrValue == NULL ? level == 0 :
4131 tgl 595 ECB : VARSIZE_ANY_EXHDR(reconstrValue) == level);
596 :
597 : /* Reconstruct the full string represented by this leaf tuple */
4131 tgl 598 GIC 166410 : fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
4131 tgl 599 CBC 166410 : if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
4131 tgl 600 ECB : {
4131 tgl 601 GIC 49752 : fullValue = VARDATA(reconstrValue);
4129 tgl 602 CBC 49752 : out->leafValue = PointerGetDatum(reconstrValue);
4131 tgl 603 ECB : }
604 : else
605 : {
3955 bruce 606 GIC 116658 : text *fullText = palloc(VARHDRSZ + fullLen);
4129 tgl 607 ECB :
4129 tgl 608 GIC 116658 : SET_VARSIZE(fullText, VARHDRSZ + fullLen);
4129 tgl 609 CBC 116658 : fullValue = VARDATA(fullText);
4131 610 116658 : if (level)
611 116646 : memcpy(fullValue, VARDATA(reconstrValue), level);
612 116658 : if (VARSIZE_ANY_EXHDR(leafValue) > 0)
613 116658 : memcpy(fullValue + level, VARDATA_ANY(leafValue),
614 116658 : VARSIZE_ANY_EXHDR(leafValue));
4129 615 116658 : out->leafValue = PointerGetDatum(fullText);
4131 tgl 616 ECB : }
617 :
618 : /* Perform the required comparison(s) */
4047 tgl 619 GIC 166410 : res = true;
4047 tgl 620 CBC 180183 : for (j = 0; j < in->nkeys; j++)
4131 tgl 621 ECB : {
4047 tgl 622 GIC 166410 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
4047 tgl 623 CBC 166410 : text *query = DatumGetTextPP(in->scankeys[j].sk_argument);
624 166410 : int queryLen = VARSIZE_ANY_EXHDR(query);
4047 tgl 625 ECB : int r;
626 :
1832 teodor 627 GIC 166410 : if (strategy == RTPrefixStrategyNumber)
1832 teodor 628 ECB : {
629 : /*
630 : * if level >= length of query then reconstrValue must begin with
631 : * query (prefix) string, so we don't need to check it again.
632 : */
1832 teodor 633 GIC 384 : res = (level >= queryLen) ||
1479 peter 634 CBC 192 : DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
1479 peter 635 ECB : PG_GET_COLLATION(),
636 : out->leafValue,
637 : PointerGetDatum(query)));
638 :
1819 tgl 639 GIC 192 : if (!res) /* no need to consider remaining conditions */
1832 teodor 640 CBC 168 : break;
1832 teodor 641 ECB :
1832 teodor 642 GIC 24 : continue;
1832 teodor 643 ECB : }
644 :
1832 teodor 645 GIC 166218 : if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
4047 tgl 646 ECB : {
647 : /* Collation-aware comparison */
1832 teodor 648 GIC 150024 : strategy -= SPG_STRATEGY_ADDITION;
4131 tgl 649 ECB :
650 : /* If asserts enabled, verify encoding of reconstructed string */
4047 tgl 651 GIC 150024 : Assert(pg_verifymbstr(fullValue, fullLen, false));
4131 tgl 652 ECB :
1819 tgl 653 GIC 150024 : r = varstr_cmp(fullValue, fullLen,
1819 tgl 654 CBC 150024 : VARDATA_ANY(query), queryLen,
4047 tgl 655 ECB : PG_GET_COLLATION());
656 : }
657 : else
658 : {
659 : /* Non-collation-aware comparison */
4047 tgl 660 GIC 16194 : r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
4131 tgl 661 ECB :
1819 tgl 662 GIC 16194 : if (r == 0)
1819 tgl 663 ECB : {
1819 tgl 664 GIC 12081 : if (queryLen > fullLen)
1819 tgl 665 CBC 6012 : r = -1;
666 6069 : else if (queryLen < fullLen)
1819 tgl 667 LBC 0 : r = 1;
1819 tgl 668 EUB : }
669 : }
670 :
4047 tgl 671 GIC 166218 : switch (strategy)
4047 tgl 672 ECB : {
4047 tgl 673 GIC 39144 : case BTLessStrategyNumber:
4047 tgl 674 CBC 39144 : res = (r < 0);
675 39144 : break;
676 39144 : case BTLessEqualStrategyNumber:
677 39144 : res = (r <= 0);
678 39144 : break;
679 12150 : case BTEqualStrategyNumber:
680 12150 : res = (r == 0);
681 12150 : break;
682 37890 : case BTGreaterEqualStrategyNumber:
683 37890 : res = (r >= 0);
684 37890 : break;
685 37890 : case BTGreaterStrategyNumber:
686 37890 : res = (r > 0);
687 37890 : break;
4047 tgl 688 LBC 0 : default:
4047 tgl 689 UBC 0 : elog(ERROR, "unrecognized strategy number: %d",
4047 tgl 690 EUB : in->scankeys[j].sk_strategy);
691 : res = false;
692 : break;
693 : }
694 :
4047 tgl 695 GIC 166218 : if (!res)
4047 tgl 696 CBC 152469 : break; /* no need to consider remaining conditions */
4131 tgl 697 ECB : }
698 :
4131 tgl 699 GIC 166410 : PG_RETURN_BOOL(res);
4131 tgl 700 ECB : }
|