LCOV - differential code coverage report
Current view: top level - src/backend/access/spgist - spgtextproc.c (source / functions) Coverage Total Hit LBC UIC UBC GIC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 97.3 % 291 283 4 1 3 134 149 5 130
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 9 9 9 9
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 97.3 % 291 283 4 1 3 134 149 5 130
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 50.0 % 18 9 9 9

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * spgtextproc.c
                                  4                 :  *    implementation of radix tree (compressed trie) over text
                                  5                 :  *
                                  6                 :  * In a text_ops SPGiST index, inner tuples can have a prefix which is the
                                  7                 :  * common prefix of all strings indexed under that tuple.  The node labels
                                  8                 :  * represent the next byte of the string(s) after the prefix.  Assuming we
                                  9                 :  * always use the longest possible prefix, we will get more than one node
                                 10                 :  * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
                                 11                 :  *
                                 12                 :  * To reconstruct the indexed string for any index entry, concatenate the
                                 13                 :  * inner-tuple prefixes and node labels starting at the root and working
                                 14                 :  * down to the leaf entry, then append the datum in the leaf entry.
                                 15                 :  * (While descending the tree, "level" is the number of bytes reconstructed
                                 16                 :  * so far.)
                                 17                 :  *
                                 18                 :  * However, there are two special cases for node labels: -1 indicates that
                                 19                 :  * there are no more bytes after the prefix-so-far, and -2 indicates that we
                                 20                 :  * had to split an existing allTheSame tuple (in such a case we have to create
                                 21                 :  * a node label that doesn't correspond to any string byte).  In either case,
                                 22                 :  * the node label does not contribute anything to the reconstructed string.
                                 23                 :  *
                                 24                 :  * Previously, we used a node label of zero for both special cases, but
                                 25                 :  * this was problematic because one can't tell whether a string ending at
                                 26                 :  * the current level can be pushed down into such a child node.  For
                                 27                 :  * backwards compatibility, we still support such node labels for reading;
                                 28                 :  * but no new entries will ever be pushed down into a zero-labeled child.
                                 29                 :  * No new entries ever get pushed into a -2-labeled child, either.
                                 30                 :  *
                                 31                 :  *
                                 32                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 33                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 34                 :  *
                                 35                 :  * IDENTIFICATION
                                 36                 :  *          src/backend/access/spgist/spgtextproc.c
                                 37                 :  *
                                 38                 :  *-------------------------------------------------------------------------
                                 39                 :  */
                                 40                 : #include "postgres.h"
                                 41                 : 
                                 42                 : #include "access/spgist.h"
                                 43                 : #include "catalog/pg_type.h"
                                 44                 : #include "mb/pg_wchar.h"
                                 45                 : #include "utils/builtins.h"
                                 46                 : #include "utils/datum.h"
                                 47                 : #include "utils/pg_locale.h"
                                 48                 : #include "utils/varlena.h"
                                 49                 : #include "varatt.h"
                                 50                 : 
                                 51                 : 
                                 52                 : /*
                                 53                 :  * In the worst case, an inner tuple in a text radix tree could have as many
                                 54                 :  * as 258 nodes (one for each possible byte value, plus the two special
                                 55                 :  * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
                                 56                 :  * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
                                 57                 :  * know the exact amount of overhead imposed by page headers, tuple headers,
                                 58                 :  * etc, we leave 100 bytes for that (the actual overhead should be no more
                                 59                 :  * than 56 bytes at this writing, so there is slop in this number).
                                 60                 :  * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
                                 61                 :  * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
                                 62                 :  * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
                                 63                 :  * tuple size exceeds maximum" if there are too many distinct next-byte values
                                 64                 :  * at a given place in the tree.  Since use of nonstandard block sizes appears
                                 65                 :  * to be negligible in the field, we just live with that fact for now,
                                 66                 :  * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
                                 67                 :  * than default.
                                 68                 :  */
                                 69                 : #define SPGIST_MAX_PREFIX_LENGTH    Max((int) (BLCKSZ - 258 * 16 - 100), 32)
                                 70                 : 
                                 71                 : /*
                                 72                 :  * Strategy for collation aware operator on text is equal to btree strategy
                                 73                 :  * plus value of 10.
                                 74                 :  *
                                 75                 :  * Current collation aware strategies and their corresponding btree strategies:
                                 76                 :  * 11 BTLessStrategyNumber
                                 77                 :  * 12 BTLessEqualStrategyNumber
                                 78                 :  * 14 BTGreaterEqualStrategyNumber
                                 79                 :  * 15 BTGreaterStrategyNumber
                                 80                 :  */
                                 81                 : #define SPG_STRATEGY_ADDITION   (10)
                                 82                 : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
                                 83                 :                                          && (s) != RTPrefixStrategyNumber)
                                 84                 : 
                                 85                 : /* Struct for sorting values in picksplit */
                                 86                 : typedef struct spgNodePtr
                                 87                 : {
                                 88                 :     Datum       d;
                                 89                 :     int         i;
                                 90                 :     int16       c;
                                 91                 : } spgNodePtr;
                                 92                 : 
                                 93                 : 
                                 94                 : Datum
 4131 tgl                        95 GIC          32 : spg_text_config(PG_FUNCTION_ARGS)
 4131 tgl                        96 ECB             : {
                                 97                 :     /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
 4131 tgl                        98 GIC          32 :     spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
 4131 tgl                        99 ECB             : 
 4131 tgl                       100 GIC          32 :     cfg->prefixType = TEXTOID;
 3226 tgl                       101 CBC          32 :     cfg->labelType = INT2OID;
 4129                           102              32 :     cfg->canReturnData = true;
 4131                           103              32 :     cfg->longValuesOK = true;    /* suffixing will shorten long values */
                                104              32 :     PG_RETURN_VOID();
 4131 tgl                       105 ECB             : }
                                106                 : 
                                107                 : /*
                                108                 :  * Form a text datum from the given not-necessarily-null-terminated string,
                                109                 :  * using short varlena header format if possible
                                110                 :  */
                                111                 : static Datum
 4131 tgl                       112 GIC      130608 : formTextDatum(const char *data, int datalen)
 4131 tgl                       113 ECB             : {
                                114                 :     char       *p;
                                115                 : 
 4131 tgl                       116 GIC      130608 :     p = (char *) palloc(datalen + VARHDRSZ);
 4131 tgl                       117 ECB             : 
 4131 tgl                       118 GIC      130608 :     if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
 4131 tgl                       119 ECB             :     {
 4131 tgl                       120 GIC      130608 :         SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
 4131 tgl                       121 CBC      130608 :         if (datalen)
                                122          122789 :             memcpy(p + VARHDRSZ_SHORT, data, datalen);
 4131 tgl                       123 ECB             :     }
                                124                 :     else
                                125                 :     {
 4131 tgl                       126 UIC           0 :         SET_VARSIZE(p, datalen + VARHDRSZ);
 4131 tgl                       127 UBC           0 :         memcpy(p + VARHDRSZ, data, datalen);
 4131 tgl                       128 EUB             :     }
                                129                 : 
 4131 tgl                       130 GIC      130608 :     return PointerGetDatum(p);
 4131 tgl                       131 ECB             : }
                                132                 : 
                                133                 : /*
                                134                 :  * Find the length of the common prefix of a and b
                                135                 :  */
                                136                 : static int
 4131 tgl                       137 GIC       48123 : commonPrefix(const char *a, const char *b, int lena, int lenb)
 4131 tgl                       138 ECB             : {
 4131 tgl                       139 GIC       48123 :     int         i = 0;
 4131 tgl                       140 ECB             : 
 4131 tgl                       141 GIC     3379944 :     while (i < lena && i < lenb && *a == *b)
 4131 tgl                       142 ECB             :     {
 4131 tgl                       143 GIC     3331821 :         a++;
 4131 tgl                       144 CBC     3331821 :         b++;
                                145         3331821 :         i++;
 4131 tgl                       146 ECB             :     }
                                147                 : 
 4131 tgl                       148 GIC       48123 :     return i;
 4131 tgl                       149 ECB             : }
                                150                 : 
                                151                 : /*
                                152                 :  * Binary search an array of int16 datums for a match to c
                                153                 :  *
                                154                 :  * On success, *i gets the match location; on failure, it gets where to insert
                                155                 :  */
                                156                 : static bool
 3226 tgl                       157 GIC      105510 : searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
 4131 tgl                       158 ECB             : {
 4131 tgl                       159 GIC      105510 :     int         StopLow = 0,
 4131 tgl                       160 CBC      105510 :                 StopHigh = nNodes;
 4131 tgl                       161 ECB             : 
 4131 tgl                       162 GIC      290633 :     while (StopLow < StopHigh)
 4131 tgl                       163 ECB             :     {
 4131 tgl                       164 GIC      289958 :         int         StopMiddle = (StopLow + StopHigh) >> 1;
 3226 tgl                       165 CBC      289958 :         int16       middle = DatumGetInt16(nodeLabels[StopMiddle]);
 4131 tgl                       166 ECB             : 
 4131 tgl                       167 GIC      289958 :         if (c < middle)
 4131 tgl                       168 CBC       92221 :             StopHigh = StopMiddle;
                                169          197737 :         else if (c > middle)
                                170           92902 :             StopLow = StopMiddle + 1;
 4131 tgl                       171 ECB             :         else
                                172                 :         {
 4131 tgl                       173 GIC      104835 :             *i = StopMiddle;
 4131 tgl                       174 CBC      104835 :             return true;
 4131 tgl                       175 ECB             :         }
                                176                 :     }
                                177                 : 
 4131 tgl                       178 GIC         675 :     *i = StopHigh;
 4131 tgl                       179 CBC         675 :     return false;
 4131 tgl                       180 ECB             : }
                                181                 : 
                                182                 : Datum
 4131 tgl                       183 GIC      105822 : spg_text_choose(PG_FUNCTION_ARGS)
 4131 tgl                       184 ECB             : {
 4131 tgl                       185 GIC      105822 :     spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
 4131 tgl                       186 CBC      105822 :     spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
                                187          105822 :     text       *inText = DatumGetTextPP(in->datum);
                                188          105822 :     char       *inStr = VARDATA_ANY(inText);
                                189          105822 :     int         inSize = VARSIZE_ANY_EXHDR(inText);
 3226                           190          105822 :     char       *prefixStr = NULL;
                                191          105822 :     int         prefixSize = 0;
 4131                           192          105822 :     int         commonLen = 0;
 3226                           193          105822 :     int16       nodeChar = 0;
                                194          105822 :     int         i = 0;
 4131 tgl                       195 ECB             : 
                                196                 :     /* Check for prefix match, set nodeChar to first byte after prefix */
 4131 tgl                       197 GIC      105822 :     if (in->hasPrefix)
 4131 tgl                       198 ECB             :     {
 4131 tgl                       199 GIC       41912 :         text       *prefixText = DatumGetTextPP(in->prefixDatum);
 3226 tgl                       200 ECB             : 
 3226 tgl                       201 GIC       41912 :         prefixStr = VARDATA_ANY(prefixText);
 3226 tgl                       202 CBC       41912 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
 4131 tgl                       203 ECB             : 
 4131 tgl                       204 GIC       41912 :         commonLen = commonPrefix(inStr + in->level,
 4131 tgl                       205 ECB             :                                  prefixStr,
 4131 tgl                       206 GIC       41912 :                                  inSize - in->level,
 4131 tgl                       207 ECB             :                                  prefixSize);
                                208                 : 
 4131 tgl                       209 GIC       41912 :         if (commonLen == prefixSize)
 4131 tgl                       210 ECB             :         {
 4131 tgl                       211 GIC       41600 :             if (inSize - in->level > commonLen)
 3226 tgl                       212 CBC       38522 :                 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
 4131 tgl                       213 ECB             :             else
 3226 tgl                       214 GIC        3078 :                 nodeChar = -1;
 4131 tgl                       215 ECB             :         }
                                216                 :         else
                                217                 :         {
                                218                 :             /* Must split tuple because incoming value doesn't match prefix */
 4131 tgl                       219 GIC         312 :             out->resultType = spgSplitTuple;
 4131 tgl                       220 ECB             : 
 4131 tgl                       221 GIC         312 :             if (commonLen == 0)
 4131 tgl                       222 ECB             :             {
 4131 tgl                       223 GIC           9 :                 out->result.splitTuple.prefixHasPrefix = false;
 4131 tgl                       224 ECB             :             }
                                225                 :             else
                                226                 :             {
 4131 tgl                       227 GIC         303 :                 out->result.splitTuple.prefixHasPrefix = true;
 4131 tgl                       228 CBC         303 :                 out->result.splitTuple.prefixPrefixDatum =
                                229             303 :                     formTextDatum(prefixStr, commonLen);
 4131 tgl                       230 ECB             :             }
 2420 tgl                       231 GIC         312 :             out->result.splitTuple.prefixNNodes = 1;
 2420 tgl                       232 CBC         312 :             out->result.splitTuple.prefixNodeLabels =
                                233             312 :                 (Datum *) palloc(sizeof(Datum));
                                234             624 :             out->result.splitTuple.prefixNodeLabels[0] =
 3226                           235             312 :                 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
 4131 tgl                       236 ECB             : 
 2420 tgl                       237 GIC         312 :             out->result.splitTuple.childNodeN = 0;
 2420 tgl                       238 ECB             : 
 4131 tgl                       239 GIC         312 :             if (prefixSize - commonLen == 1)
 4131 tgl                       240 ECB             :             {
 4131 tgl                       241 GIC         306 :                 out->result.splitTuple.postfixHasPrefix = false;
 4131 tgl                       242 ECB             :             }
                                243                 :             else
                                244                 :             {
 4131 tgl                       245 GIC           6 :                 out->result.splitTuple.postfixHasPrefix = true;
 4131 tgl                       246 CBC           6 :                 out->result.splitTuple.postfixPrefixDatum =
                                247               6 :                     formTextDatum(prefixStr + commonLen + 1,
                                248               6 :                                   prefixSize - commonLen - 1);
 4131 tgl                       249 ECB             :             }
                                250                 : 
 4131 tgl                       251 GIC         312 :             PG_RETURN_VOID();
 4131 tgl                       252 ECB             :         }
                                253                 :     }
 4131 tgl                       254 GIC       63910 :     else if (inSize > in->level)
 4131 tgl                       255 ECB             :     {
 3226 tgl                       256 GIC       63411 :         nodeChar = *(unsigned char *) (inStr + in->level);
 4131 tgl                       257 ECB             :     }
                                258                 :     else
                                259                 :     {
 3226 tgl                       260 GIC         499 :         nodeChar = -1;
 4131 tgl                       261 ECB             :     }
                                262                 : 
                                263                 :     /* Look up nodeChar in the node label array */
 4131 tgl                       264 GIC      105510 :     if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
 4131 tgl                       265 ECB             :     {
                                266                 :         /*
                                267                 :          * Descend to existing node.  (If in->allTheSame, the core code will
                                268                 :          * ignore our nodeN specification here, but that's OK.  We still have
                                269                 :          * to provide the correct levelAdd and restDatum values, and those are
                                270                 :          * the same regardless of which node gets chosen by core.)
                                271                 :          */
                                272                 :         int         levelAdd;
                                273                 : 
 4131 tgl                       274 GIC      104835 :         out->resultType = spgMatchNode;
 4131 tgl                       275 CBC      104835 :         out->result.matchNode.nodeN = i;
 3226                           276          104835 :         levelAdd = commonLen;
                                277          104835 :         if (nodeChar >= 0)
                                278          101261 :             levelAdd++;
                                279          104835 :         out->result.matchNode.levelAdd = levelAdd;
                                280          104835 :         if (inSize - in->level - levelAdd > 0)
 4131                           281          101258 :             out->result.matchNode.restDatum =
 3226                           282          101258 :                 formTextDatum(inStr + in->level + levelAdd,
                                283          101258 :                               inSize - in->level - levelAdd);
 4131 tgl                       284 ECB             :         else
 4131 tgl                       285 GIC        3577 :             out->result.matchNode.restDatum =
 4131 tgl                       286 CBC        3577 :                 formTextDatum(NULL, 0);
 4131 tgl                       287 ECB             :     }
 4131 tgl                       288 GIC         675 :     else if (in->allTheSame)
 4131 tgl                       289 ECB             :     {
                                290                 :         /*
                                291                 :          * Can't use AddNode action, so split the tuple.  The upper tuple has
                                292                 :          * the same prefix as before and uses a dummy node label -2 for the
                                293                 :          * lower tuple.  The lower tuple has no prefix and the same node
                                294                 :          * labels as the original tuple.
                                295                 :          *
                                296                 :          * Note: it might seem tempting to shorten the upper tuple's prefix,
                                297                 :          * if it has one, then use its last byte as label for the lower tuple.
                                298                 :          * But that doesn't win since we know the incoming value matches the
                                299                 :          * whole prefix: we'd just end up splitting the lower tuple again.
                                300                 :          */
 4131 tgl                       301 GIC           3 :         out->resultType = spgSplitTuple;
 4131 tgl                       302 CBC           3 :         out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
                                303               3 :         out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
 2420                           304               3 :         out->result.splitTuple.prefixNNodes = 1;
                                305               3 :         out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
                                306               3 :         out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
                                307               3 :         out->result.splitTuple.childNodeN = 0;
 4131                           308               3 :         out->result.splitTuple.postfixHasPrefix = false;
 4131 tgl                       309 ECB             :     }
                                310                 :     else
                                311                 :     {
                                312                 :         /* Add a node for the not-previously-seen nodeChar value */
 4131 tgl                       313 GIC         672 :         out->resultType = spgAddNode;
 3226 tgl                       314 CBC         672 :         out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
 4131                           315             672 :         out->result.addNode.nodeN = i;
 4131 tgl                       316 ECB             :     }
                                317                 : 
 4131 tgl                       318 GIC      105510 :     PG_RETURN_VOID();
 4131 tgl                       319 ECB             : }
                                320                 : 
                                321                 : /* qsort comparator to sort spgNodePtr structs by "c" */
                                322                 : static int
 4131 tgl                       323 GIC       57875 : cmpNodePtr(const void *a, const void *b)
 4131 tgl                       324 ECB             : {
 4131 tgl                       325 GIC       57875 :     const spgNodePtr *aa = (const spgNodePtr *) a;
 4131 tgl                       326 CBC       57875 :     const spgNodePtr *bb = (const spgNodePtr *) b;
 4131 tgl                       327 ECB             : 
 3226 tgl                       328 GIC       57875 :     return aa->c - bb->c;
 4131 tgl                       329 ECB             : }
                                330                 : 
                                331                 : Datum
 4131 tgl                       332 GIC         262 : spg_text_picksplit(PG_FUNCTION_ARGS)
 4131 tgl                       333 ECB             : {
 4131 tgl                       334 GIC         262 :     spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
 4131 tgl                       335 CBC         262 :     spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
                                336             262 :     text       *text0 = DatumGetTextPP(in->datums[0]);
 4131 tgl                       337 ECB             :     int         i,
                                338                 :                 commonLen;
                                339                 :     spgNodePtr *nodes;
                                340                 : 
                                341                 :     /* Identify longest common prefix, if any */
 4131 tgl                       342 GIC         262 :     commonLen = VARSIZE_ANY_EXHDR(text0);
 4131 tgl                       343 CBC        6473 :     for (i = 1; i < in->nTuples && commonLen > 0; i++)
 4131 tgl                       344 ECB             :     {
 4131 tgl                       345 GIC        6211 :         text       *texti = DatumGetTextPP(in->datums[i]);
 4131 tgl                       346 CBC       18633 :         int         tmp = commonPrefix(VARDATA_ANY(text0),
                                347            6211 :                                        VARDATA_ANY(texti),
                                348            6211 :                                        VARSIZE_ANY_EXHDR(text0),
                                349            6211 :                                        VARSIZE_ANY_EXHDR(texti));
 4131 tgl                       350 ECB             : 
 4131 tgl                       351 GIC        6211 :         if (tmp < commonLen)
 4131 tgl                       352 CBC         209 :             commonLen = tmp;
 4131 tgl                       353 ECB             :     }
                                354                 : 
                                355                 :     /*
                                356                 :      * Limit the prefix length, if necessary, to ensure that the resulting
                                357                 :      * inner tuple will fit on a page.
                                358                 :      */
 4131 tgl                       359 GIC         262 :     commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
 4131 tgl                       360 ECB             : 
                                361                 :     /* Set node prefix to be that string, if it's not empty */
 4131 tgl                       362 GIC         262 :     if (commonLen == 0)
 4131 tgl                       363 ECB             :     {
 4131 tgl                       364 GIC         219 :         out->hasPrefix = false;
 4131 tgl                       365 ECB             :     }
                                366                 :     else
                                367                 :     {
 4131 tgl                       368 GIC          43 :         out->hasPrefix = true;
 4131 tgl                       369 CBC          43 :         out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
 4131 tgl                       370 ECB             :     }
                                371                 : 
                                372                 :     /* Extract the node label (first non-common byte) from each value */
 4131 tgl                       373 GIC         262 :     nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
 4131 tgl                       374 ECB             : 
 4131 tgl                       375 GIC       25683 :     for (i = 0; i < in->nTuples; i++)
 4131 tgl                       376 ECB             :     {
 4131 tgl                       377 GIC       25421 :         text       *texti = DatumGetTextPP(in->datums[i]);
 4131 tgl                       378 ECB             : 
 4131 tgl                       379 GIC       25421 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
 3226 tgl                       380 CBC       22042 :             nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
 4131 tgl                       381 ECB             :         else
 3226 tgl                       382 GIC        3379 :             nodes[i].c = -1;    /* use -1 if string is all common */
 4131 tgl                       383 CBC       25421 :         nodes[i].i = i;
                                384           25421 :         nodes[i].d = in->datums[i];
 4131 tgl                       385 ECB             :     }
                                386                 : 
                                387                 :     /*
                                388                 :      * Sort by label values so that we can group the values into nodes.  This
                                389                 :      * also ensures that the nodes are ordered by label value, allowing the
                                390                 :      * use of binary search in searchChar.
                                391                 :      */
 4131 tgl                       392 GIC         262 :     qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
 4131 tgl                       393 ECB             : 
                                394                 :     /* And emit results */
 4131 tgl                       395 GIC         262 :     out->nNodes = 0;
 4131 tgl                       396 CBC         262 :     out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
                                397             262 :     out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
                                398             262 :     out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
 4131 tgl                       399 ECB             : 
 4131 tgl                       400 GIC       25683 :     for (i = 0; i < in->nTuples; i++)
 4131 tgl                       401 ECB             :     {
 4131 tgl                       402 GIC       25421 :         text       *texti = DatumGetTextPP(nodes[i].d);
 4131 tgl                       403 ECB             :         Datum       leafD;
                                404                 : 
 4131 tgl                       405 GIC       25421 :         if (i == 0 || nodes[i].c != nodes[i - 1].c)
 4131 tgl                       406 ECB             :         {
 3226 tgl                       407 GIC        1609 :             out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
 4131 tgl                       408 CBC        1609 :             out->nNodes++;
 4131 tgl                       409 ECB             :         }
                                410                 : 
 4131 tgl                       411 GIC       25421 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
 4131 tgl                       412 CBC       22042 :             leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
                                413           22042 :                                   VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
 4131 tgl                       414 ECB             :         else
 4131 tgl                       415 GIC        3379 :             leafD = formTextDatum(NULL, 0);
 4131 tgl                       416 ECB             : 
 4131 tgl                       417 GIC       25421 :         out->leafTupleDatums[nodes[i].i] = leafD;
 4131 tgl                       418 CBC       25421 :         out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
 4131 tgl                       419 ECB             :     }
                                420                 : 
 4131 tgl                       421 GIC         262 :     PG_RETURN_VOID();
 4131 tgl                       422 ECB             : }
                                423                 : 
                                424                 : Datum
 4131 tgl                       425 GIC        1170 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
 4131 tgl                       426 ECB             : {
 4131 tgl                       427 GIC        1170 :     spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
 4131 tgl                       428 CBC        1170 :     spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
 4047                           429            1170 :     bool        collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
 2654 tgl                       430 ECB             :     text       *reconstructedValue;
                                431                 :     text       *reconstrText;
                                432                 :     int         maxReconstrLen;
 4131 tgl                       433 GIC        1170 :     text       *prefixText = NULL;
 4131 tgl                       434 CBC        1170 :     int         prefixSize = 0;
 4047 tgl                       435 ECB             :     int         i;
                                436                 : 
                                437                 :     /*
                                438                 :      * Reconstruct values represented at this tuple, including parent data,
                                439                 :      * prefix of this tuple if any, and the node label if it's non-dummy.
                                440                 :      * in->level should be the length of the previously reconstructed value,
                                441                 :      * and the number of bytes added here is prefixSize or prefixSize + 1.
                                442                 :      *
                                443                 :      * Note: we assume that in->reconstructedValue isn't toasted and doesn't
                                444                 :      * have a short varlena header.  This is okay because it must have been
                                445                 :      * created by a previous invocation of this routine, and we always emit
                                446                 :      * long-format reconstructed values.
                                447                 :      */
 2654 tgl                       448 GIC        1170 :     reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
 2654 tgl                       449 CBC        1170 :     Assert(reconstructedValue == NULL ? in->level == 0 :
 2654 tgl                       450 ECB             :            VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
                                451                 : 
 4131 tgl                       452 GIC        1170 :     maxReconstrLen = in->level + 1;
 4131 tgl                       453 CBC        1170 :     if (in->hasPrefix)
 4131 tgl                       454 ECB             :     {
 4131 tgl                       455 GIC         234 :         prefixText = DatumGetTextPP(in->prefixDatum);
 4131 tgl                       456 CBC         234 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
                                457             234 :         maxReconstrLen += prefixSize;
 4131 tgl                       458 ECB             :     }
                                459                 : 
 4131 tgl                       460 GIC        1170 :     reconstrText = palloc(VARHDRSZ + maxReconstrLen);
 4131 tgl                       461 CBC        1170 :     SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
 4131 tgl                       462 ECB             : 
 4131 tgl                       463 GIC        1170 :     if (in->level)
 4131 tgl                       464 CBC        1080 :         memcpy(VARDATA(reconstrText),
 2654                           465            1080 :                VARDATA(reconstructedValue),
 4131                           466            1080 :                in->level);
                                467            1170 :     if (prefixSize)
                                468             234 :         memcpy(((char *) VARDATA(reconstrText)) + in->level,
                                469             234 :                VARDATA_ANY(prefixText),
 4131 tgl                       470 ECB             :                prefixSize);
                                471                 :     /* last byte of reconstrText will be filled in below */
                                472                 : 
                                473                 :     /*
                                474                 :      * Scan the child nodes.  For each one, complete the reconstructed value
                                475                 :      * and see if it's consistent with the query.  If so, emit an entry into
                                476                 :      * the output arrays.
                                477                 :      */
 4131 tgl                       478 GIC        1170 :     out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
 4131 tgl                       479 CBC        1170 :     out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
                                480            1170 :     out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
                                481            1170 :     out->nNodes = 0;
 4131 tgl                       482 ECB             : 
 4131 tgl                       483 GIC       11664 :     for (i = 0; i < in->nNodes; i++)
 4131 tgl                       484 ECB             :     {
 3226 tgl                       485 GIC       10494 :         int16       nodeChar = DatumGetInt16(in->nodeLabels[i]);
 4131 tgl                       486 ECB             :         int         thisLen;
 4047 tgl                       487 GIC       10494 :         bool        res = true;
 4047 tgl                       488 ECB             :         int         j;
                                489                 : 
                                490                 :         /* If nodeChar is a dummy value, don't include it in data */
 3226 tgl                       491 GIC       10494 :         if (nodeChar <= 0)
 4131 tgl                       492 CBC        2382 :             thisLen = maxReconstrLen - 1;
 4131 tgl                       493 ECB             :         else
                                494                 :         {
 3226 tgl                       495 GIC        8112 :             ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
 4131 tgl                       496 CBC        8112 :             thisLen = maxReconstrLen;
 4131 tgl                       497 ECB             :         }
                                498                 : 
 4047 tgl                       499 GIC       18942 :         for (j = 0; j < in->nkeys; j++)
 4131 tgl                       500 ECB             :         {
 4047 tgl                       501 GIC       10494 :             StrategyNumber strategy = in->scankeys[j].sk_strategy;
 4047 tgl                       502 ECB             :             text       *inText;
                                503                 :             int         inSize;
                                504                 :             int         r;
                                505                 : 
                                506                 :             /*
                                507                 :              * If it's a collation-aware operator, but the collation is C, we
                                508                 :              * can treat it as non-collation-aware.  With non-C collation we
                                509                 :              * need to traverse whole tree :-( so there's no point in making
                                510                 :              * any check here.  (Note also that our reconstructed value may
                                511                 :              * well end with a partial multibyte character, so that applying
                                512                 :              * any encoding-sensitive test to it would be risky anyhow.)
                                513                 :              */
 1832 teodor                    514 GIC       10494 :             if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
 4047 tgl                       515 ECB             :             {
 4047 tgl                       516 GIC        7352 :                 if (collate_is_c)
 1832 teodor                    517 LBC           0 :                     strategy -= SPG_STRATEGY_ADDITION;
 4047 tgl                       518 EUB             :                 else
 4047 tgl                       519 GIC        7352 :                     continue;
 4047 tgl                       520 ECB             :             }
                                521                 : 
 4047 tgl                       522 GIC        3142 :             inText = DatumGetTextPP(in->scankeys[j].sk_argument);
 4047 tgl                       523 CBC        3142 :             inSize = VARSIZE_ANY_EXHDR(inText);
 4047 tgl                       524 ECB             : 
 4047 tgl                       525 GIC        3142 :             r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
 4047 tgl                       526 CBC        3142 :                        Min(inSize, thisLen));
 4047 tgl                       527 ECB             : 
 4047 tgl                       528 GIC        3142 :             switch (strategy)
 4047 tgl                       529 ECB             :             {
 4047 tgl                       530 GIC         528 :                 case BTLessStrategyNumber:
 4047 tgl                       531 ECB             :                 case BTLessEqualStrategyNumber:
 4047 tgl                       532 GIC         528 :                     if (r > 0)
 4047 tgl                       533 CBC         300 :                         res = false;
                                534             528 :                     break;
                                535            1798 :                 case BTEqualStrategyNumber:
                                536            1798 :                     if (r != 0 || inSize < thisLen)
                                537            1050 :                         res = false;
                                538            1798 :                     break;
                                539             408 :                 case BTGreaterEqualStrategyNumber:
 4047 tgl                       540 ECB             :                 case BTGreaterStrategyNumber:
 4047 tgl                       541 GIC         408 :                     if (r < 0)
 4047 tgl                       542 CBC         312 :                         res = false;
                                543             408 :                     break;
 1832 teodor                    544             408 :                 case RTPrefixStrategyNumber:
                                545             408 :                     if (r != 0)
                                546             384 :                         res = false;
                                547             408 :                     break;
 4047 tgl                       548 LBC           0 :                 default:
 4047 tgl                       549 UBC           0 :                     elog(ERROR, "unrecognized strategy number: %d",
 4047 tgl                       550 EUB             :                          in->scankeys[j].sk_strategy);
                                551                 :                     break;
                                552                 :             }
                                553                 : 
 4047 tgl                       554 GIC        3142 :             if (!res)
 4047 tgl                       555 CBC        2046 :                 break;          /* no need to consider remaining conditions */
 4131 tgl                       556 ECB             :         }
                                557                 : 
 4131 tgl                       558 GIC       10494 :         if (res)
 4131 tgl                       559 ECB             :         {
 4131 tgl                       560 GIC        8448 :             out->nodeNumbers[out->nNodes] = i;
 4131 tgl                       561 CBC        8448 :             out->levelAdds[out->nNodes] = thisLen - in->level;
                                562            8448 :             SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
                                563           16896 :             out->reconstructedValues[out->nNodes] =
                                564            8448 :                 datumCopy(PointerGetDatum(reconstrText), false, -1);
                                565            8448 :             out->nNodes++;
 4131 tgl                       566 ECB             :         }
                                567                 :     }
                                568                 : 
 4131 tgl                       569 GIC        1170 :     PG_RETURN_VOID();
 4131 tgl                       570 ECB             : }
                                571                 : 
                                572                 : Datum
 4131 tgl                       573 GIC      166410 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
 4131 tgl                       574 ECB             : {
 4131 tgl                       575 GIC      166410 :     spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
 4131 tgl                       576 CBC      166410 :     spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
                                577          166410 :     int         level = in->level;
 4131 tgl                       578 ECB             :     text       *leafValue,
 4131 tgl                       579 GIC      166410 :                *reconstrValue = NULL;
 4131 tgl                       580 ECB             :     char       *fullValue;
                                581                 :     int         fullLen;
                                582                 :     bool        res;
                                583                 :     int         j;
                                584                 : 
                                585                 :     /* all tests are exact */
 4131 tgl                       586 GIC      166410 :     out->recheck = false;
 4131 tgl                       587 ECB             : 
 4131 tgl                       588 GIC      166410 :     leafValue = DatumGetTextPP(in->leafDatum);
 4131 tgl                       589 ECB             : 
                                590                 :     /* As above, in->reconstructedValue isn't toasted or short. */
 4131 tgl                       591 GIC      166410 :     if (DatumGetPointer(in->reconstructedValue))
 2219 noah                      592 CBC      166398 :         reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
 4131 tgl                       593 ECB             : 
 2654 tgl                       594 GIC      166410 :     Assert(reconstrValue == NULL ? level == 0 :
 4131 tgl                       595 ECB             :            VARSIZE_ANY_EXHDR(reconstrValue) == level);
                                596                 : 
                                597                 :     /* Reconstruct the full string represented by this leaf tuple */
 4131 tgl                       598 GIC      166410 :     fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
 4131 tgl                       599 CBC      166410 :     if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
 4131 tgl                       600 ECB             :     {
 4131 tgl                       601 GIC       49752 :         fullValue = VARDATA(reconstrValue);
 4129 tgl                       602 CBC       49752 :         out->leafValue = PointerGetDatum(reconstrValue);
 4131 tgl                       603 ECB             :     }
                                604                 :     else
                                605                 :     {
 3955 bruce                     606 GIC      116658 :         text       *fullText = palloc(VARHDRSZ + fullLen);
 4129 tgl                       607 ECB             : 
 4129 tgl                       608 GIC      116658 :         SET_VARSIZE(fullText, VARHDRSZ + fullLen);
 4129 tgl                       609 CBC      116658 :         fullValue = VARDATA(fullText);
 4131                           610          116658 :         if (level)
                                611          116646 :             memcpy(fullValue, VARDATA(reconstrValue), level);
                                612          116658 :         if (VARSIZE_ANY_EXHDR(leafValue) > 0)
                                613          116658 :             memcpy(fullValue + level, VARDATA_ANY(leafValue),
                                614          116658 :                    VARSIZE_ANY_EXHDR(leafValue));
 4129                           615          116658 :         out->leafValue = PointerGetDatum(fullText);
 4131 tgl                       616 ECB             :     }
                                617                 : 
                                618                 :     /* Perform the required comparison(s) */
 4047 tgl                       619 GIC      166410 :     res = true;
 4047 tgl                       620 CBC      180183 :     for (j = 0; j < in->nkeys; j++)
 4131 tgl                       621 ECB             :     {
 4047 tgl                       622 GIC      166410 :         StrategyNumber strategy = in->scankeys[j].sk_strategy;
 4047 tgl                       623 CBC      166410 :         text       *query = DatumGetTextPP(in->scankeys[j].sk_argument);
                                624          166410 :         int         queryLen = VARSIZE_ANY_EXHDR(query);
 4047 tgl                       625 ECB             :         int         r;
                                626                 : 
 1832 teodor                    627 GIC      166410 :         if (strategy == RTPrefixStrategyNumber)
 1832 teodor                    628 ECB             :         {
                                629                 :             /*
                                630                 :              * if level >= length of query then reconstrValue must begin with
                                631                 :              * query (prefix) string, so we don't need to check it again.
                                632                 :              */
 1832 teodor                    633 GIC         384 :             res = (level >= queryLen) ||
 1479 peter                     634 CBC         192 :                 DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
 1479 peter                     635 ECB             :                                                      PG_GET_COLLATION(),
                                636                 :                                                      out->leafValue,
                                637                 :                                                      PointerGetDatum(query)));
                                638                 : 
 1819 tgl                       639 GIC         192 :             if (!res)           /* no need to consider remaining conditions */
 1832 teodor                    640 CBC         168 :                 break;
 1832 teodor                    641 ECB             : 
 1832 teodor                    642 GIC          24 :             continue;
 1832 teodor                    643 ECB             :         }
                                644                 : 
 1832 teodor                    645 GIC      166218 :         if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
 4047 tgl                       646 ECB             :         {
                                647                 :             /* Collation-aware comparison */
 1832 teodor                    648 GIC      150024 :             strategy -= SPG_STRATEGY_ADDITION;
 4131 tgl                       649 ECB             : 
                                650                 :             /* If asserts enabled, verify encoding of reconstructed string */
 4047 tgl                       651 GIC      150024 :             Assert(pg_verifymbstr(fullValue, fullLen, false));
 4131 tgl                       652 ECB             : 
 1819 tgl                       653 GIC      150024 :             r = varstr_cmp(fullValue, fullLen,
 1819 tgl                       654 CBC      150024 :                            VARDATA_ANY(query), queryLen,
 4047 tgl                       655 ECB             :                            PG_GET_COLLATION());
                                656                 :         }
                                657                 :         else
                                658                 :         {
                                659                 :             /* Non-collation-aware comparison */
 4047 tgl                       660 GIC       16194 :             r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
 4131 tgl                       661 ECB             : 
 1819 tgl                       662 GIC       16194 :             if (r == 0)
 1819 tgl                       663 ECB             :             {
 1819 tgl                       664 GIC       12081 :                 if (queryLen > fullLen)
 1819 tgl                       665 CBC        6012 :                     r = -1;
                                666            6069 :                 else if (queryLen < fullLen)
 1819 tgl                       667 LBC           0 :                     r = 1;
 1819 tgl                       668 EUB             :             }
                                669                 :         }
                                670                 : 
 4047 tgl                       671 GIC      166218 :         switch (strategy)
 4047 tgl                       672 ECB             :         {
 4047 tgl                       673 GIC       39144 :             case BTLessStrategyNumber:
 4047 tgl                       674 CBC       39144 :                 res = (r < 0);
                                675           39144 :                 break;
                                676           39144 :             case BTLessEqualStrategyNumber:
                                677           39144 :                 res = (r <= 0);
                                678           39144 :                 break;
                                679           12150 :             case BTEqualStrategyNumber:
                                680           12150 :                 res = (r == 0);
                                681           12150 :                 break;
                                682           37890 :             case BTGreaterEqualStrategyNumber:
                                683           37890 :                 res = (r >= 0);
                                684           37890 :                 break;
                                685           37890 :             case BTGreaterStrategyNumber:
                                686           37890 :                 res = (r > 0);
                                687           37890 :                 break;
 4047 tgl                       688 LBC           0 :             default:
 4047 tgl                       689 UBC           0 :                 elog(ERROR, "unrecognized strategy number: %d",
 4047 tgl                       690 EUB             :                      in->scankeys[j].sk_strategy);
                                691                 :                 res = false;
                                692                 :                 break;
                                693                 :         }
                                694                 : 
 4047 tgl                       695 GIC      166218 :         if (!res)
 4047 tgl                       696 CBC      152469 :             break;              /* no need to consider remaining conditions */
 4131 tgl                       697 ECB             :     }
                                698                 : 
 4131 tgl                       699 GIC      166410 :     PG_RETURN_BOOL(res);
 4131 tgl                       700 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a