TLA Line data Source code
1 : /*--------------------------------------------------------------------------
2 : *
3 : * spgist_name_ops.c
4 : * Test opclass for SP-GiST
5 : *
6 : * This indexes input values of type "name", but the index storage is "text",
7 : * with the same choices as made in the core SP-GiST text_ops opclass.
8 : * Much of the code is identical to src/backend/access/spgist/spgtextproc.c,
9 : * which see for a more detailed header comment.
10 : *
11 : * Unlike spgtextproc.c, we don't bother with collation-aware logic.
12 : *
13 : *
14 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
15 : * Portions Copyright (c) 1994, Regents of the University of California
16 : *
17 : * IDENTIFICATION
18 : * src/test/modules/spgist_name_ops/spgist_name_ops.c
19 : *
20 : * -------------------------------------------------------------------------
21 : */
22 : #include "postgres.h"
23 :
24 : #include "access/spgist.h"
25 : #include "catalog/pg_type.h"
26 : #include "utils/datum.h"
27 : #include "varatt.h"
28 :
29 GIC 1 : PG_MODULE_MAGIC;
30 ECB :
31 :
32 GIC 2 : PG_FUNCTION_INFO_V1(spgist_name_config);
33 ECB : Datum
34 GIC 7 : spgist_name_config(PG_FUNCTION_ARGS)
35 ECB : {
36 : /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
37 GIC 7 : spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
38 ECB :
39 GIC 7 : cfg->prefixType = TEXTOID;
40 CBC 7 : cfg->labelType = INT2OID;
41 7 : cfg->leafType = TEXTOID;
42 7 : cfg->canReturnData = true;
43 7 : cfg->longValuesOK = true; /* suffixing will shorten long values */
44 7 : PG_RETURN_VOID();
45 ECB : }
46 :
47 : /*
48 : * Form a text datum from the given not-necessarily-null-terminated string,
49 : * using short varlena header format if possible
50 : */
51 : static Datum
52 GIC 17245 : formTextDatum(const char *data, int datalen)
53 ECB : {
54 : char *p;
55 :
56 GIC 17245 : p = (char *) palloc(datalen + VARHDRSZ);
57 ECB :
58 GIC 17245 : if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
59 ECB : {
60 GIC 17245 : SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
61 CBC 17245 : if (datalen)
62 17202 : memcpy(p + VARHDRSZ_SHORT, data, datalen);
63 ECB : }
64 : else
65 : {
66 UIC 0 : SET_VARSIZE(p, datalen + VARHDRSZ);
67 UBC 0 : memcpy(p + VARHDRSZ, data, datalen);
68 EUB : }
69 :
70 GIC 17245 : return PointerGetDatum(p);
71 ECB : }
72 :
73 : /*
74 : * Find the length of the common prefix of a and b
75 : */
76 : static int
77 GIC 918 : commonPrefix(const char *a, const char *b, int lena, int lenb)
78 ECB : {
79 GIC 918 : int i = 0;
80 ECB :
81 GIC 2028 : while (i < lena && i < lenb && *a == *b)
82 ECB : {
83 GIC 1110 : a++;
84 CBC 1110 : b++;
85 1110 : i++;
86 ECB : }
87 :
88 GIC 918 : return i;
89 ECB : }
90 :
91 : /*
92 : * Binary search an array of int16 datums for a match to c
93 : *
94 : * On success, *i gets the match location; on failure, it gets where to insert
95 : */
96 : static bool
97 GIC 10728 : searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
98 ECB : {
99 GIC 10728 : int StopLow = 0,
100 CBC 10728 : StopHigh = nNodes;
101 ECB :
102 GIC 34623 : while (StopLow < StopHigh)
103 ECB : {
104 GIC 34512 : int StopMiddle = (StopLow + StopHigh) >> 1;
105 CBC 34512 : int16 middle = DatumGetInt16(nodeLabels[StopMiddle]);
106 ECB :
107 GIC 34512 : if (c < middle)
108 CBC 13841 : StopHigh = StopMiddle;
109 20671 : else if (c > middle)
110 10054 : StopLow = StopMiddle + 1;
111 ECB : else
112 : {
113 GIC 10617 : *i = StopMiddle;
114 CBC 10617 : return true;
115 ECB : }
116 : }
117 :
118 GIC 111 : *i = StopHigh;
119 CBC 111 : return false;
120 ECB : }
121 :
122 GIC 2 : PG_FUNCTION_INFO_V1(spgist_name_choose);
123 ECB : Datum
124 GIC 10734 : spgist_name_choose(PG_FUNCTION_ARGS)
125 ECB : {
126 GIC 10734 : spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
127 CBC 10734 : spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
128 10734 : Name inName = DatumGetName(in->datum);
129 10734 : char *inStr = NameStr(*inName);
130 10734 : int inSize = strlen(inStr);
131 10734 : char *prefixStr = NULL;
132 10734 : int prefixSize = 0;
133 10734 : int commonLen = 0;
134 10734 : int16 nodeChar = 0;
135 10734 : int i = 0;
136 ECB :
137 : /* Check for prefix match, set nodeChar to first byte after prefix */
138 GIC 10734 : if (in->hasPrefix)
139 ECB : {
140 GIC 918 : text *prefixText = DatumGetTextPP(in->prefixDatum);
141 ECB :
142 GIC 918 : prefixStr = VARDATA_ANY(prefixText);
143 CBC 918 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
144 ECB :
145 GIC 918 : commonLen = commonPrefix(inStr + in->level,
146 ECB : prefixStr,
147 GIC 918 : inSize - in->level,
148 ECB : prefixSize);
149 :
150 GIC 918 : if (commonLen == prefixSize)
151 ECB : {
152 GIC 912 : if (inSize - in->level > commonLen)
153 CBC 910 : nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
154 ECB : else
155 GIC 2 : nodeChar = -1;
156 ECB : }
157 : else
158 : {
159 : /* Must split tuple because incoming value doesn't match prefix */
160 GIC 6 : out->resultType = spgSplitTuple;
161 ECB :
162 GIC 6 : if (commonLen == 0)
163 ECB : {
164 GIC 2 : out->result.splitTuple.prefixHasPrefix = false;
165 ECB : }
166 : else
167 : {
168 GIC 4 : out->result.splitTuple.prefixHasPrefix = true;
169 CBC 4 : out->result.splitTuple.prefixPrefixDatum =
170 4 : formTextDatum(prefixStr, commonLen);
171 ECB : }
172 GIC 6 : out->result.splitTuple.prefixNNodes = 1;
173 CBC 6 : out->result.splitTuple.prefixNodeLabels =
174 6 : (Datum *) palloc(sizeof(Datum));
175 12 : out->result.splitTuple.prefixNodeLabels[0] =
176 6 : Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
177 ECB :
178 GIC 6 : out->result.splitTuple.childNodeN = 0;
179 ECB :
180 GIC 6 : if (prefixSize - commonLen == 1)
181 ECB : {
182 GIC 4 : out->result.splitTuple.postfixHasPrefix = false;
183 ECB : }
184 : else
185 : {
186 GIC 2 : out->result.splitTuple.postfixHasPrefix = true;
187 CBC 2 : out->result.splitTuple.postfixPrefixDatum =
188 2 : formTextDatum(prefixStr + commonLen + 1,
189 2 : prefixSize - commonLen - 1);
190 ECB : }
191 :
192 GIC 6 : PG_RETURN_VOID();
193 ECB : }
194 : }
195 GIC 9816 : else if (inSize > in->level)
196 ECB : {
197 GIC 9796 : nodeChar = *(unsigned char *) (inStr + in->level);
198 ECB : }
199 : else
200 : {
201 GIC 20 : nodeChar = -1;
202 ECB : }
203 :
204 : /* Look up nodeChar in the node label array */
205 GIC 10728 : if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
206 ECB : {
207 : /*
208 : * Descend to existing node. (If in->allTheSame, the core code will
209 : * ignore our nodeN specification here, but that's OK. We still have
210 : * to provide the correct levelAdd and restDatum values, and those are
211 : * the same regardless of which node gets chosen by core.)
212 : */
213 : int levelAdd;
214 :
215 GIC 10617 : out->resultType = spgMatchNode;
216 CBC 10617 : out->result.matchNode.nodeN = i;
217 10617 : levelAdd = commonLen;
218 10617 : if (nodeChar >= 0)
219 10595 : levelAdd++;
220 10617 : out->result.matchNode.levelAdd = levelAdd;
221 10617 : if (inSize - in->level - levelAdd > 0)
222 10574 : out->result.matchNode.restDatum =
223 10574 : formTextDatum(inStr + in->level + levelAdd,
224 10574 : inSize - in->level - levelAdd);
225 ECB : else
226 GIC 43 : out->result.matchNode.restDatum =
227 CBC 43 : formTextDatum(NULL, 0);
228 ECB : }
229 GIC 111 : else if (in->allTheSame)
230 ECB : {
231 : /*
232 : * Can't use AddNode action, so split the tuple. The upper tuple has
233 : * the same prefix as before and uses a dummy node label -2 for the
234 : * lower tuple. The lower tuple has no prefix and the same node
235 : * labels as the original tuple.
236 : *
237 : * Note: it might seem tempting to shorten the upper tuple's prefix,
238 : * if it has one, then use its last byte as label for the lower tuple.
239 : * But that doesn't win since we know the incoming value matches the
240 : * whole prefix: we'd just end up splitting the lower tuple again.
241 : */
242 UIC 0 : out->resultType = spgSplitTuple;
243 UBC 0 : out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
244 0 : out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
245 0 : out->result.splitTuple.prefixNNodes = 1;
246 0 : out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
247 0 : out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
248 0 : out->result.splitTuple.childNodeN = 0;
249 0 : out->result.splitTuple.postfixHasPrefix = false;
250 EUB : }
251 : else
252 : {
253 : /* Add a node for the not-previously-seen nodeChar value */
254 GIC 111 : out->resultType = spgAddNode;
255 CBC 111 : out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
256 111 : out->result.addNode.nodeN = i;
257 ECB : }
258 :
259 GIC 10728 : PG_RETURN_VOID();
260 ECB : }
261 :
262 : /* The picksplit function is identical to the core opclass, so just use that */
263 :
264 GIC 2 : PG_FUNCTION_INFO_V1(spgist_name_inner_consistent);
265 ECB : Datum
266 GIC 4 : spgist_name_inner_consistent(PG_FUNCTION_ARGS)
267 ECB : {
268 GIC 4 : spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
269 CBC 4 : spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
270 ECB : text *reconstructedValue;
271 : text *reconstrText;
272 : int maxReconstrLen;
273 GIC 4 : text *prefixText = NULL;
274 CBC 4 : int prefixSize = 0;
275 ECB : int i;
276 :
277 : /*
278 : * Reconstruct values represented at this tuple, including parent data,
279 : * prefix of this tuple if any, and the node label if it's non-dummy.
280 : * in->level should be the length of the previously reconstructed value,
281 : * and the number of bytes added here is prefixSize or prefixSize + 1.
282 : *
283 : * Recall that reconstructedValues are assumed to be the same type as leaf
284 : * datums, so we must use "text" not "name" for them.
285 : *
286 : * Note: we assume that in->reconstructedValue isn't toasted and doesn't
287 : * have a short varlena header. This is okay because it must have been
288 : * created by a previous invocation of this routine, and we always emit
289 : * long-format reconstructed values.
290 : */
291 GIC 4 : reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
292 CBC 4 : Assert(reconstructedValue == NULL ? in->level == 0 :
293 ECB : VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
294 :
295 GIC 4 : maxReconstrLen = in->level + 1;
296 CBC 4 : if (in->hasPrefix)
297 ECB : {
298 UIC 0 : prefixText = DatumGetTextPP(in->prefixDatum);
299 UBC 0 : prefixSize = VARSIZE_ANY_EXHDR(prefixText);
300 0 : maxReconstrLen += prefixSize;
301 EUB : }
302 :
303 GIC 4 : reconstrText = palloc(VARHDRSZ + maxReconstrLen);
304 CBC 4 : SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
305 ECB :
306 GIC 4 : if (in->level)
307 CBC 2 : memcpy(VARDATA(reconstrText),
308 2 : VARDATA(reconstructedValue),
309 2 : in->level);
310 4 : if (prefixSize)
311 LBC 0 : memcpy(((char *) VARDATA(reconstrText)) + in->level,
312 UBC 0 : VARDATA_ANY(prefixText),
313 EUB : prefixSize);
314 : /* last byte of reconstrText will be filled in below */
315 :
316 : /*
317 : * Scan the child nodes. For each one, complete the reconstructed value
318 : * and see if it's consistent with the query. If so, emit an entry into
319 : * the output arrays.
320 : */
321 GIC 4 : out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
322 CBC 4 : out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
323 4 : out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
324 4 : out->nNodes = 0;
325 ECB :
326 GIC 70 : for (i = 0; i < in->nNodes; i++)
327 ECB : {
328 GIC 66 : int16 nodeChar = DatumGetInt16(in->nodeLabels[i]);
329 ECB : int thisLen;
330 GIC 66 : bool res = true;
331 ECB : int j;
332 :
333 : /* If nodeChar is a dummy value, don't include it in data */
334 GIC 66 : if (nodeChar <= 0)
335 LBC 0 : thisLen = maxReconstrLen - 1;
336 EUB : else
337 : {
338 GIC 66 : ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
339 CBC 66 : thisLen = maxReconstrLen;
340 ECB : }
341 :
342 GIC 128 : for (j = 0; j < in->nkeys; j++)
343 ECB : {
344 GIC 124 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
345 ECB : Name inName;
346 : char *inStr;
347 : int inSize;
348 : int r;
349 :
350 GIC 124 : inName = DatumGetName(in->scankeys[j].sk_argument);
351 CBC 124 : inStr = NameStr(*inName);
352 124 : inSize = strlen(inStr);
353 ECB :
354 GIC 124 : r = memcmp(VARDATA(reconstrText), inStr,
355 CBC 124 : Min(inSize, thisLen));
356 ECB :
357 GIC 124 : switch (strategy)
358 ECB : {
359 GIC 58 : case BTLessStrategyNumber:
360 ECB : case BTLessEqualStrategyNumber:
361 GIC 58 : if (r > 0)
362 CBC 54 : res = false;
363 58 : break;
364 LBC 0 : case BTEqualStrategyNumber:
365 UBC 0 : if (r != 0 || inSize < thisLen)
366 0 : res = false;
367 0 : break;
368 GBC 66 : case BTGreaterEqualStrategyNumber:
369 ECB : case BTGreaterStrategyNumber:
370 GIC 66 : if (r < 0)
371 CBC 8 : res = false;
372 66 : break;
373 LBC 0 : default:
374 UBC 0 : elog(ERROR, "unrecognized strategy number: %d",
375 EUB : in->scankeys[j].sk_strategy);
376 : break;
377 : }
378 :
379 GIC 124 : if (!res)
380 CBC 62 : break; /* no need to consider remaining conditions */
381 ECB : }
382 :
383 GIC 66 : if (res)
384 ECB : {
385 GIC 4 : out->nodeNumbers[out->nNodes] = i;
386 CBC 4 : out->levelAdds[out->nNodes] = thisLen - in->level;
387 4 : SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
388 8 : out->reconstructedValues[out->nNodes] =
389 4 : datumCopy(PointerGetDatum(reconstrText), false, -1);
390 4 : out->nNodes++;
391 ECB : }
392 : }
393 :
394 GIC 4 : PG_RETURN_VOID();
395 ECB : }
396 :
397 GIC 2 : PG_FUNCTION_INFO_V1(spgist_name_leaf_consistent);
398 ECB : Datum
399 GIC 118 : spgist_name_leaf_consistent(PG_FUNCTION_ARGS)
400 ECB : {
401 GIC 118 : spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
402 CBC 118 : spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
403 118 : int level = in->level;
404 ECB : text *leafValue,
405 GIC 118 : *reconstrValue = NULL;
406 ECB : char *fullValue;
407 : int fullLen;
408 : bool res;
409 : int j;
410 :
411 : /* all tests are exact */
412 GIC 118 : out->recheck = false;
413 ECB :
414 GIC 118 : leafValue = DatumGetTextPP(in->leafDatum);
415 ECB :
416 : /* As above, in->reconstructedValue isn't toasted or short. */
417 GIC 118 : if (DatumGetPointer(in->reconstructedValue))
418 CBC 118 : reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
419 ECB :
420 GIC 118 : Assert(reconstrValue == NULL ? level == 0 :
421 ECB : VARSIZE_ANY_EXHDR(reconstrValue) == level);
422 :
423 : /* Reconstruct the Name represented by this leaf tuple */
424 GIC 118 : fullValue = palloc0(NAMEDATALEN);
425 CBC 118 : fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
426 118 : Assert(fullLen < NAMEDATALEN);
427 118 : if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
428 ECB : {
429 UIC 0 : memcpy(fullValue, VARDATA(reconstrValue),
430 UBC 0 : VARSIZE_ANY_EXHDR(reconstrValue));
431 EUB : }
432 : else
433 : {
434 GIC 118 : if (level)
435 CBC 118 : memcpy(fullValue, VARDATA(reconstrValue), level);
436 118 : if (VARSIZE_ANY_EXHDR(leafValue) > 0)
437 118 : memcpy(fullValue + level, VARDATA_ANY(leafValue),
438 118 : VARSIZE_ANY_EXHDR(leafValue));
439 ECB : }
440 GIC 118 : out->leafValue = PointerGetDatum(fullValue);
441 ECB :
442 : /* Perform the required comparison(s) */
443 GIC 118 : res = true;
444 CBC 252 : for (j = 0; j < in->nkeys; j++)
445 ECB : {
446 GIC 226 : StrategyNumber strategy = in->scankeys[j].sk_strategy;
447 CBC 226 : Name queryName = DatumGetName(in->scankeys[j].sk_argument);
448 226 : char *queryStr = NameStr(*queryName);
449 226 : int queryLen = strlen(queryStr);
450 ECB : int r;
451 :
452 : /* Non-collation-aware comparison */
453 GIC 226 : r = memcmp(fullValue, queryStr, Min(queryLen, fullLen));
454 ECB :
455 GIC 226 : if (r == 0)
456 ECB : {
457 GIC 26 : if (queryLen > fullLen)
458 LBC 0 : r = -1;
459 GBC 26 : else if (queryLen < fullLen)
460 CBC 26 : r = 1;
461 ECB : }
462 :
463 GIC 226 : switch (strategy)
464 ECB : {
465 GIC 108 : case BTLessStrategyNumber:
466 CBC 108 : res = (r < 0);
467 108 : break;
468 LBC 0 : case BTLessEqualStrategyNumber:
469 UBC 0 : res = (r <= 0);
470 0 : break;
471 0 : case BTEqualStrategyNumber:
472 0 : res = (r == 0);
473 0 : break;
474 0 : case BTGreaterEqualStrategyNumber:
475 0 : res = (r >= 0);
476 0 : break;
477 GBC 118 : case BTGreaterStrategyNumber:
478 CBC 118 : res = (r > 0);
479 118 : break;
480 LBC 0 : default:
481 UBC 0 : elog(ERROR, "unrecognized strategy number: %d",
482 EUB : in->scankeys[j].sk_strategy);
483 : res = false;
484 : break;
485 : }
486 :
487 GIC 226 : if (!res)
488 CBC 92 : break; /* no need to consider remaining conditions */
489 ECB : }
490 :
491 GIC 118 : PG_RETURN_BOOL(res);
492 ECB : }
493 :
494 GIC 2 : PG_FUNCTION_INFO_V1(spgist_name_compress);
495 ECB : Datum
496 GIC 6622 : spgist_name_compress(PG_FUNCTION_ARGS)
497 ECB : {
498 GIC 6622 : Name inName = PG_GETARG_NAME(0);
499 CBC 6622 : char *inStr = NameStr(*inName);
500 ECB :
501 GIC 6622 : PG_RETURN_DATUM(formTextDatum(inStr, strlen(inStr)));
502 ECB : }
|