LCOV - differential code coverage report
Current view: top level - src/backend/access/spgist - spgtextproc.c (source / functions) Coverage Total Hit LBC UIC UBC GIC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 97.3 % 291 283 4 1 3 134 149 5 130
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 9 9 9 9
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * spgtextproc.c
       4                 :  *    implementation of radix tree (compressed trie) over text
       5                 :  *
       6                 :  * In a text_ops SPGiST index, inner tuples can have a prefix which is the
       7                 :  * common prefix of all strings indexed under that tuple.  The node labels
       8                 :  * represent the next byte of the string(s) after the prefix.  Assuming we
       9                 :  * always use the longest possible prefix, we will get more than one node
      10                 :  * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH.
      11                 :  *
      12                 :  * To reconstruct the indexed string for any index entry, concatenate the
      13                 :  * inner-tuple prefixes and node labels starting at the root and working
      14                 :  * down to the leaf entry, then append the datum in the leaf entry.
      15                 :  * (While descending the tree, "level" is the number of bytes reconstructed
      16                 :  * so far.)
      17                 :  *
      18                 :  * However, there are two special cases for node labels: -1 indicates that
      19                 :  * there are no more bytes after the prefix-so-far, and -2 indicates that we
      20                 :  * had to split an existing allTheSame tuple (in such a case we have to create
      21                 :  * a node label that doesn't correspond to any string byte).  In either case,
      22                 :  * the node label does not contribute anything to the reconstructed string.
      23                 :  *
      24                 :  * Previously, we used a node label of zero for both special cases, but
      25                 :  * this was problematic because one can't tell whether a string ending at
      26                 :  * the current level can be pushed down into such a child node.  For
      27                 :  * backwards compatibility, we still support such node labels for reading;
      28                 :  * but no new entries will ever be pushed down into a zero-labeled child.
      29                 :  * No new entries ever get pushed into a -2-labeled child, either.
      30                 :  *
      31                 :  *
      32                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      33                 :  * Portions Copyright (c) 1994, Regents of the University of California
      34                 :  *
      35                 :  * IDENTIFICATION
      36                 :  *          src/backend/access/spgist/spgtextproc.c
      37                 :  *
      38                 :  *-------------------------------------------------------------------------
      39                 :  */
      40                 : #include "postgres.h"
      41                 : 
      42                 : #include "access/spgist.h"
      43                 : #include "catalog/pg_type.h"
      44                 : #include "mb/pg_wchar.h"
      45                 : #include "utils/builtins.h"
      46                 : #include "utils/datum.h"
      47                 : #include "utils/pg_locale.h"
      48                 : #include "utils/varlena.h"
      49                 : #include "varatt.h"
      50                 : 
      51                 : 
      52                 : /*
      53                 :  * In the worst case, an inner tuple in a text radix tree could have as many
      54                 :  * as 258 nodes (one for each possible byte value, plus the two special
      55                 :  * cases).  Each node can take 16 bytes on MAXALIGN=8 machines.  The inner
      56                 :  * tuple must fit on an index page of size BLCKSZ.  Rather than assuming we
      57                 :  * know the exact amount of overhead imposed by page headers, tuple headers,
      58                 :  * etc, we leave 100 bytes for that (the actual overhead should be no more
      59                 :  * than 56 bytes at this writing, so there is slop in this number).
      60                 :  * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long.
      61                 :  * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length
      62                 :  * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner
      63                 :  * tuple size exceeds maximum" if there are too many distinct next-byte values
      64                 :  * at a given place in the tree.  Since use of nonstandard block sizes appears
      65                 :  * to be negligible in the field, we just live with that fact for now,
      66                 :  * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller
      67                 :  * than default.
      68                 :  */
      69                 : #define SPGIST_MAX_PREFIX_LENGTH    Max((int) (BLCKSZ - 258 * 16 - 100), 32)
      70                 : 
      71                 : /*
      72                 :  * Strategy for collation aware operator on text is equal to btree strategy
      73                 :  * plus value of 10.
      74                 :  *
      75                 :  * Current collation aware strategies and their corresponding btree strategies:
      76                 :  * 11 BTLessStrategyNumber
      77                 :  * 12 BTLessEqualStrategyNumber
      78                 :  * 14 BTGreaterEqualStrategyNumber
      79                 :  * 15 BTGreaterStrategyNumber
      80                 :  */
      81                 : #define SPG_STRATEGY_ADDITION   (10)
      82                 : #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \
      83                 :                                          && (s) != RTPrefixStrategyNumber)
      84                 : 
      85                 : /* Struct for sorting values in picksplit */
      86                 : typedef struct spgNodePtr
      87                 : {
      88                 :     Datum       d;
      89                 :     int         i;
      90                 :     int16       c;
      91                 : } spgNodePtr;
      92                 : 
      93                 : 
      94                 : Datum
      95 GIC          32 : spg_text_config(PG_FUNCTION_ARGS)
      96 ECB             : {
      97                 :     /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
      98 GIC          32 :     spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
      99 ECB             : 
     100 GIC          32 :     cfg->prefixType = TEXTOID;
     101 CBC          32 :     cfg->labelType = INT2OID;
     102              32 :     cfg->canReturnData = true;
     103              32 :     cfg->longValuesOK = true;    /* suffixing will shorten long values */
     104              32 :     PG_RETURN_VOID();
     105 ECB             : }
     106                 : 
     107                 : /*
     108                 :  * Form a text datum from the given not-necessarily-null-terminated string,
     109                 :  * using short varlena header format if possible
     110                 :  */
     111                 : static Datum
     112 GIC      130608 : formTextDatum(const char *data, int datalen)
     113 ECB             : {
     114                 :     char       *p;
     115                 : 
     116 GIC      130608 :     p = (char *) palloc(datalen + VARHDRSZ);
     117 ECB             : 
     118 GIC      130608 :     if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX)
     119 ECB             :     {
     120 GIC      130608 :         SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT);
     121 CBC      130608 :         if (datalen)
     122          122789 :             memcpy(p + VARHDRSZ_SHORT, data, datalen);
     123 ECB             :     }
     124                 :     else
     125                 :     {
     126 UIC           0 :         SET_VARSIZE(p, datalen + VARHDRSZ);
     127 UBC           0 :         memcpy(p + VARHDRSZ, data, datalen);
     128 EUB             :     }
     129                 : 
     130 GIC      130608 :     return PointerGetDatum(p);
     131 ECB             : }
     132                 : 
     133                 : /*
     134                 :  * Find the length of the common prefix of a and b
     135                 :  */
     136                 : static int
     137 GIC       48123 : commonPrefix(const char *a, const char *b, int lena, int lenb)
     138 ECB             : {
     139 GIC       48123 :     int         i = 0;
     140 ECB             : 
     141 GIC     3379944 :     while (i < lena && i < lenb && *a == *b)
     142 ECB             :     {
     143 GIC     3331821 :         a++;
     144 CBC     3331821 :         b++;
     145         3331821 :         i++;
     146 ECB             :     }
     147                 : 
     148 GIC       48123 :     return i;
     149 ECB             : }
     150                 : 
     151                 : /*
     152                 :  * Binary search an array of int16 datums for a match to c
     153                 :  *
     154                 :  * On success, *i gets the match location; on failure, it gets where to insert
     155                 :  */
     156                 : static bool
     157 GIC      105510 : searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i)
     158 ECB             : {
     159 GIC      105510 :     int         StopLow = 0,
     160 CBC      105510 :                 StopHigh = nNodes;
     161 ECB             : 
     162 GIC      290633 :     while (StopLow < StopHigh)
     163 ECB             :     {
     164 GIC      289958 :         int         StopMiddle = (StopLow + StopHigh) >> 1;
     165 CBC      289958 :         int16       middle = DatumGetInt16(nodeLabels[StopMiddle]);
     166 ECB             : 
     167 GIC      289958 :         if (c < middle)
     168 CBC       92221 :             StopHigh = StopMiddle;
     169          197737 :         else if (c > middle)
     170           92902 :             StopLow = StopMiddle + 1;
     171 ECB             :         else
     172                 :         {
     173 GIC      104835 :             *i = StopMiddle;
     174 CBC      104835 :             return true;
     175 ECB             :         }
     176                 :     }
     177                 : 
     178 GIC         675 :     *i = StopHigh;
     179 CBC         675 :     return false;
     180 ECB             : }
     181                 : 
     182                 : Datum
     183 GIC      105822 : spg_text_choose(PG_FUNCTION_ARGS)
     184 ECB             : {
     185 GIC      105822 :     spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
     186 CBC      105822 :     spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
     187          105822 :     text       *inText = DatumGetTextPP(in->datum);
     188          105822 :     char       *inStr = VARDATA_ANY(inText);
     189          105822 :     int         inSize = VARSIZE_ANY_EXHDR(inText);
     190          105822 :     char       *prefixStr = NULL;
     191          105822 :     int         prefixSize = 0;
     192          105822 :     int         commonLen = 0;
     193          105822 :     int16       nodeChar = 0;
     194          105822 :     int         i = 0;
     195 ECB             : 
     196                 :     /* Check for prefix match, set nodeChar to first byte after prefix */
     197 GIC      105822 :     if (in->hasPrefix)
     198 ECB             :     {
     199 GIC       41912 :         text       *prefixText = DatumGetTextPP(in->prefixDatum);
     200 ECB             : 
     201 GIC       41912 :         prefixStr = VARDATA_ANY(prefixText);
     202 CBC       41912 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
     203 ECB             : 
     204 GIC       41912 :         commonLen = commonPrefix(inStr + in->level,
     205 ECB             :                                  prefixStr,
     206 GIC       41912 :                                  inSize - in->level,
     207 ECB             :                                  prefixSize);
     208                 : 
     209 GIC       41912 :         if (commonLen == prefixSize)
     210 ECB             :         {
     211 GIC       41600 :             if (inSize - in->level > commonLen)
     212 CBC       38522 :                 nodeChar = *(unsigned char *) (inStr + in->level + commonLen);
     213 ECB             :             else
     214 GIC        3078 :                 nodeChar = -1;
     215 ECB             :         }
     216                 :         else
     217                 :         {
     218                 :             /* Must split tuple because incoming value doesn't match prefix */
     219 GIC         312 :             out->resultType = spgSplitTuple;
     220 ECB             : 
     221 GIC         312 :             if (commonLen == 0)
     222 ECB             :             {
     223 GIC           9 :                 out->result.splitTuple.prefixHasPrefix = false;
     224 ECB             :             }
     225                 :             else
     226                 :             {
     227 GIC         303 :                 out->result.splitTuple.prefixHasPrefix = true;
     228 CBC         303 :                 out->result.splitTuple.prefixPrefixDatum =
     229             303 :                     formTextDatum(prefixStr, commonLen);
     230 ECB             :             }
     231 GIC         312 :             out->result.splitTuple.prefixNNodes = 1;
     232 CBC         312 :             out->result.splitTuple.prefixNodeLabels =
     233             312 :                 (Datum *) palloc(sizeof(Datum));
     234             624 :             out->result.splitTuple.prefixNodeLabels[0] =
     235             312 :                 Int16GetDatum(*(unsigned char *) (prefixStr + commonLen));
     236 ECB             : 
     237 GIC         312 :             out->result.splitTuple.childNodeN = 0;
     238 ECB             : 
     239 GIC         312 :             if (prefixSize - commonLen == 1)
     240 ECB             :             {
     241 GIC         306 :                 out->result.splitTuple.postfixHasPrefix = false;
     242 ECB             :             }
     243                 :             else
     244                 :             {
     245 GIC           6 :                 out->result.splitTuple.postfixHasPrefix = true;
     246 CBC           6 :                 out->result.splitTuple.postfixPrefixDatum =
     247               6 :                     formTextDatum(prefixStr + commonLen + 1,
     248               6 :                                   prefixSize - commonLen - 1);
     249 ECB             :             }
     250                 : 
     251 GIC         312 :             PG_RETURN_VOID();
     252 ECB             :         }
     253                 :     }
     254 GIC       63910 :     else if (inSize > in->level)
     255 ECB             :     {
     256 GIC       63411 :         nodeChar = *(unsigned char *) (inStr + in->level);
     257 ECB             :     }
     258                 :     else
     259                 :     {
     260 GIC         499 :         nodeChar = -1;
     261 ECB             :     }
     262                 : 
     263                 :     /* Look up nodeChar in the node label array */
     264 GIC      105510 :     if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i))
     265 ECB             :     {
     266                 :         /*
     267                 :          * Descend to existing node.  (If in->allTheSame, the core code will
     268                 :          * ignore our nodeN specification here, but that's OK.  We still have
     269                 :          * to provide the correct levelAdd and restDatum values, and those are
     270                 :          * the same regardless of which node gets chosen by core.)
     271                 :          */
     272                 :         int         levelAdd;
     273                 : 
     274 GIC      104835 :         out->resultType = spgMatchNode;
     275 CBC      104835 :         out->result.matchNode.nodeN = i;
     276          104835 :         levelAdd = commonLen;
     277          104835 :         if (nodeChar >= 0)
     278          101261 :             levelAdd++;
     279          104835 :         out->result.matchNode.levelAdd = levelAdd;
     280          104835 :         if (inSize - in->level - levelAdd > 0)
     281          101258 :             out->result.matchNode.restDatum =
     282          101258 :                 formTextDatum(inStr + in->level + levelAdd,
     283          101258 :                               inSize - in->level - levelAdd);
     284 ECB             :         else
     285 GIC        3577 :             out->result.matchNode.restDatum =
     286 CBC        3577 :                 formTextDatum(NULL, 0);
     287 ECB             :     }
     288 GIC         675 :     else if (in->allTheSame)
     289 ECB             :     {
     290                 :         /*
     291                 :          * Can't use AddNode action, so split the tuple.  The upper tuple has
     292                 :          * the same prefix as before and uses a dummy node label -2 for the
     293                 :          * lower tuple.  The lower tuple has no prefix and the same node
     294                 :          * labels as the original tuple.
     295                 :          *
     296                 :          * Note: it might seem tempting to shorten the upper tuple's prefix,
     297                 :          * if it has one, then use its last byte as label for the lower tuple.
     298                 :          * But that doesn't win since we know the incoming value matches the
     299                 :          * whole prefix: we'd just end up splitting the lower tuple again.
     300                 :          */
     301 GIC           3 :         out->resultType = spgSplitTuple;
     302 CBC           3 :         out->result.splitTuple.prefixHasPrefix = in->hasPrefix;
     303               3 :         out->result.splitTuple.prefixPrefixDatum = in->prefixDatum;
     304               3 :         out->result.splitTuple.prefixNNodes = 1;
     305               3 :         out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum));
     306               3 :         out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2);
     307               3 :         out->result.splitTuple.childNodeN = 0;
     308               3 :         out->result.splitTuple.postfixHasPrefix = false;
     309 ECB             :     }
     310                 :     else
     311                 :     {
     312                 :         /* Add a node for the not-previously-seen nodeChar value */
     313 GIC         672 :         out->resultType = spgAddNode;
     314 CBC         672 :         out->result.addNode.nodeLabel = Int16GetDatum(nodeChar);
     315             672 :         out->result.addNode.nodeN = i;
     316 ECB             :     }
     317                 : 
     318 GIC      105510 :     PG_RETURN_VOID();
     319 ECB             : }
     320                 : 
     321                 : /* qsort comparator to sort spgNodePtr structs by "c" */
     322                 : static int
     323 GIC       57875 : cmpNodePtr(const void *a, const void *b)
     324 ECB             : {
     325 GIC       57875 :     const spgNodePtr *aa = (const spgNodePtr *) a;
     326 CBC       57875 :     const spgNodePtr *bb = (const spgNodePtr *) b;
     327 ECB             : 
     328 GIC       57875 :     return aa->c - bb->c;
     329 ECB             : }
     330                 : 
     331                 : Datum
     332 GIC         262 : spg_text_picksplit(PG_FUNCTION_ARGS)
     333 ECB             : {
     334 GIC         262 :     spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
     335 CBC         262 :     spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
     336             262 :     text       *text0 = DatumGetTextPP(in->datums[0]);
     337 ECB             :     int         i,
     338                 :                 commonLen;
     339                 :     spgNodePtr *nodes;
     340                 : 
     341                 :     /* Identify longest common prefix, if any */
     342 GIC         262 :     commonLen = VARSIZE_ANY_EXHDR(text0);
     343 CBC        6473 :     for (i = 1; i < in->nTuples && commonLen > 0; i++)
     344 ECB             :     {
     345 GIC        6211 :         text       *texti = DatumGetTextPP(in->datums[i]);
     346 CBC       18633 :         int         tmp = commonPrefix(VARDATA_ANY(text0),
     347            6211 :                                        VARDATA_ANY(texti),
     348            6211 :                                        VARSIZE_ANY_EXHDR(text0),
     349            6211 :                                        VARSIZE_ANY_EXHDR(texti));
     350 ECB             : 
     351 GIC        6211 :         if (tmp < commonLen)
     352 CBC         209 :             commonLen = tmp;
     353 ECB             :     }
     354                 : 
     355                 :     /*
     356                 :      * Limit the prefix length, if necessary, to ensure that the resulting
     357                 :      * inner tuple will fit on a page.
     358                 :      */
     359 GIC         262 :     commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH);
     360 ECB             : 
     361                 :     /* Set node prefix to be that string, if it's not empty */
     362 GIC         262 :     if (commonLen == 0)
     363 ECB             :     {
     364 GIC         219 :         out->hasPrefix = false;
     365 ECB             :     }
     366                 :     else
     367                 :     {
     368 GIC          43 :         out->hasPrefix = true;
     369 CBC          43 :         out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen);
     370 ECB             :     }
     371                 : 
     372                 :     /* Extract the node label (first non-common byte) from each value */
     373 GIC         262 :     nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples);
     374 ECB             : 
     375 GIC       25683 :     for (i = 0; i < in->nTuples; i++)
     376 ECB             :     {
     377 GIC       25421 :         text       *texti = DatumGetTextPP(in->datums[i]);
     378 ECB             : 
     379 GIC       25421 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
     380 CBC       22042 :             nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen);
     381 ECB             :         else
     382 GIC        3379 :             nodes[i].c = -1;    /* use -1 if string is all common */
     383 CBC       25421 :         nodes[i].i = i;
     384           25421 :         nodes[i].d = in->datums[i];
     385 ECB             :     }
     386                 : 
     387                 :     /*
     388                 :      * Sort by label values so that we can group the values into nodes.  This
     389                 :      * also ensures that the nodes are ordered by label value, allowing the
     390                 :      * use of binary search in searchChar.
     391                 :      */
     392 GIC         262 :     qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr);
     393 ECB             : 
     394                 :     /* And emit results */
     395 GIC         262 :     out->nNodes = 0;
     396 CBC         262 :     out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples);
     397             262 :     out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
     398             262 :     out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
     399 ECB             : 
     400 GIC       25683 :     for (i = 0; i < in->nTuples; i++)
     401 ECB             :     {
     402 GIC       25421 :         text       *texti = DatumGetTextPP(nodes[i].d);
     403 ECB             :         Datum       leafD;
     404                 : 
     405 GIC       25421 :         if (i == 0 || nodes[i].c != nodes[i - 1].c)
     406 ECB             :         {
     407 GIC        1609 :             out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c);
     408 CBC        1609 :             out->nNodes++;
     409 ECB             :         }
     410                 : 
     411 GIC       25421 :         if (commonLen < VARSIZE_ANY_EXHDR(texti))
     412 CBC       22042 :             leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1,
     413           22042 :                                   VARSIZE_ANY_EXHDR(texti) - commonLen - 1);
     414 ECB             :         else
     415 GIC        3379 :             leafD = formTextDatum(NULL, 0);
     416 ECB             : 
     417 GIC       25421 :         out->leafTupleDatums[nodes[i].i] = leafD;
     418 CBC       25421 :         out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1;
     419 ECB             :     }
     420                 : 
     421 GIC         262 :     PG_RETURN_VOID();
     422 ECB             : }
     423                 : 
     424                 : Datum
     425 GIC        1170 : spg_text_inner_consistent(PG_FUNCTION_ARGS)
     426 ECB             : {
     427 GIC        1170 :     spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
     428 CBC        1170 :     spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
     429            1170 :     bool        collate_is_c = lc_collate_is_c(PG_GET_COLLATION());
     430 ECB             :     text       *reconstructedValue;
     431                 :     text       *reconstrText;
     432                 :     int         maxReconstrLen;
     433 GIC        1170 :     text       *prefixText = NULL;
     434 CBC        1170 :     int         prefixSize = 0;
     435 ECB             :     int         i;
     436                 : 
     437                 :     /*
     438                 :      * Reconstruct values represented at this tuple, including parent data,
     439                 :      * prefix of this tuple if any, and the node label if it's non-dummy.
     440                 :      * in->level should be the length of the previously reconstructed value,
     441                 :      * and the number of bytes added here is prefixSize or prefixSize + 1.
     442                 :      *
     443                 :      * Note: we assume that in->reconstructedValue isn't toasted and doesn't
     444                 :      * have a short varlena header.  This is okay because it must have been
     445                 :      * created by a previous invocation of this routine, and we always emit
     446                 :      * long-format reconstructed values.
     447                 :      */
     448 GIC        1170 :     reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue);
     449 CBC        1170 :     Assert(reconstructedValue == NULL ? in->level == 0 :
     450 ECB             :            VARSIZE_ANY_EXHDR(reconstructedValue) == in->level);
     451                 : 
     452 GIC        1170 :     maxReconstrLen = in->level + 1;
     453 CBC        1170 :     if (in->hasPrefix)
     454 ECB             :     {
     455 GIC         234 :         prefixText = DatumGetTextPP(in->prefixDatum);
     456 CBC         234 :         prefixSize = VARSIZE_ANY_EXHDR(prefixText);
     457             234 :         maxReconstrLen += prefixSize;
     458 ECB             :     }
     459                 : 
     460 GIC        1170 :     reconstrText = palloc(VARHDRSZ + maxReconstrLen);
     461 CBC        1170 :     SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen);
     462 ECB             : 
     463 GIC        1170 :     if (in->level)
     464 CBC        1080 :         memcpy(VARDATA(reconstrText),
     465            1080 :                VARDATA(reconstructedValue),
     466            1080 :                in->level);
     467            1170 :     if (prefixSize)
     468             234 :         memcpy(((char *) VARDATA(reconstrText)) + in->level,
     469             234 :                VARDATA_ANY(prefixText),
     470 ECB             :                prefixSize);
     471                 :     /* last byte of reconstrText will be filled in below */
     472                 : 
     473                 :     /*
     474                 :      * Scan the child nodes.  For each one, complete the reconstructed value
     475                 :      * and see if it's consistent with the query.  If so, emit an entry into
     476                 :      * the output arrays.
     477                 :      */
     478 GIC        1170 :     out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
     479 CBC        1170 :     out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes);
     480            1170 :     out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes);
     481            1170 :     out->nNodes = 0;
     482 ECB             : 
     483 GIC       11664 :     for (i = 0; i < in->nNodes; i++)
     484 ECB             :     {
     485 GIC       10494 :         int16       nodeChar = DatumGetInt16(in->nodeLabels[i]);
     486 ECB             :         int         thisLen;
     487 GIC       10494 :         bool        res = true;
     488 ECB             :         int         j;
     489                 : 
     490                 :         /* If nodeChar is a dummy value, don't include it in data */
     491 GIC       10494 :         if (nodeChar <= 0)
     492 CBC        2382 :             thisLen = maxReconstrLen - 1;
     493 ECB             :         else
     494                 :         {
     495 GIC        8112 :             ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar;
     496 CBC        8112 :             thisLen = maxReconstrLen;
     497 ECB             :         }
     498                 : 
     499 GIC       18942 :         for (j = 0; j < in->nkeys; j++)
     500 ECB             :         {
     501 GIC       10494 :             StrategyNumber strategy = in->scankeys[j].sk_strategy;
     502 ECB             :             text       *inText;
     503                 :             int         inSize;
     504                 :             int         r;
     505                 : 
     506                 :             /*
     507                 :              * If it's a collation-aware operator, but the collation is C, we
     508                 :              * can treat it as non-collation-aware.  With non-C collation we
     509                 :              * need to traverse whole tree :-( so there's no point in making
     510                 :              * any check here.  (Note also that our reconstructed value may
     511                 :              * well end with a partial multibyte character, so that applying
     512                 :              * any encoding-sensitive test to it would be risky anyhow.)
     513                 :              */
     514 GIC       10494 :             if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
     515 ECB             :             {
     516 GIC        7352 :                 if (collate_is_c)
     517 LBC           0 :                     strategy -= SPG_STRATEGY_ADDITION;
     518 EUB             :                 else
     519 GIC        7352 :                     continue;
     520 ECB             :             }
     521                 : 
     522 GIC        3142 :             inText = DatumGetTextPP(in->scankeys[j].sk_argument);
     523 CBC        3142 :             inSize = VARSIZE_ANY_EXHDR(inText);
     524 ECB             : 
     525 GIC        3142 :             r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText),
     526 CBC        3142 :                        Min(inSize, thisLen));
     527 ECB             : 
     528 GIC        3142 :             switch (strategy)
     529 ECB             :             {
     530 GIC         528 :                 case BTLessStrategyNumber:
     531 ECB             :                 case BTLessEqualStrategyNumber:
     532 GIC         528 :                     if (r > 0)
     533 CBC         300 :                         res = false;
     534             528 :                     break;
     535            1798 :                 case BTEqualStrategyNumber:
     536            1798 :                     if (r != 0 || inSize < thisLen)
     537            1050 :                         res = false;
     538            1798 :                     break;
     539             408 :                 case BTGreaterEqualStrategyNumber:
     540 ECB             :                 case BTGreaterStrategyNumber:
     541 GIC         408 :                     if (r < 0)
     542 CBC         312 :                         res = false;
     543             408 :                     break;
     544             408 :                 case RTPrefixStrategyNumber:
     545             408 :                     if (r != 0)
     546             384 :                         res = false;
     547             408 :                     break;
     548 LBC           0 :                 default:
     549 UBC           0 :                     elog(ERROR, "unrecognized strategy number: %d",
     550 EUB             :                          in->scankeys[j].sk_strategy);
     551                 :                     break;
     552                 :             }
     553                 : 
     554 GIC        3142 :             if (!res)
     555 CBC        2046 :                 break;          /* no need to consider remaining conditions */
     556 ECB             :         }
     557                 : 
     558 GIC       10494 :         if (res)
     559 ECB             :         {
     560 GIC        8448 :             out->nodeNumbers[out->nNodes] = i;
     561 CBC        8448 :             out->levelAdds[out->nNodes] = thisLen - in->level;
     562            8448 :             SET_VARSIZE(reconstrText, VARHDRSZ + thisLen);
     563           16896 :             out->reconstructedValues[out->nNodes] =
     564            8448 :                 datumCopy(PointerGetDatum(reconstrText), false, -1);
     565            8448 :             out->nNodes++;
     566 ECB             :         }
     567                 :     }
     568                 : 
     569 GIC        1170 :     PG_RETURN_VOID();
     570 ECB             : }
     571                 : 
     572                 : Datum
     573 GIC      166410 : spg_text_leaf_consistent(PG_FUNCTION_ARGS)
     574 ECB             : {
     575 GIC      166410 :     spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
     576 CBC      166410 :     spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
     577          166410 :     int         level = in->level;
     578 ECB             :     text       *leafValue,
     579 GIC      166410 :                *reconstrValue = NULL;
     580 ECB             :     char       *fullValue;
     581                 :     int         fullLen;
     582                 :     bool        res;
     583                 :     int         j;
     584                 : 
     585                 :     /* all tests are exact */
     586 GIC      166410 :     out->recheck = false;
     587 ECB             : 
     588 GIC      166410 :     leafValue = DatumGetTextPP(in->leafDatum);
     589 ECB             : 
     590                 :     /* As above, in->reconstructedValue isn't toasted or short. */
     591 GIC      166410 :     if (DatumGetPointer(in->reconstructedValue))
     592 CBC      166398 :         reconstrValue = (text *) DatumGetPointer(in->reconstructedValue);
     593 ECB             : 
     594 GIC      166410 :     Assert(reconstrValue == NULL ? level == 0 :
     595 ECB             :            VARSIZE_ANY_EXHDR(reconstrValue) == level);
     596                 : 
     597                 :     /* Reconstruct the full string represented by this leaf tuple */
     598 GIC      166410 :     fullLen = level + VARSIZE_ANY_EXHDR(leafValue);
     599 CBC      166410 :     if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0)
     600 ECB             :     {
     601 GIC       49752 :         fullValue = VARDATA(reconstrValue);
     602 CBC       49752 :         out->leafValue = PointerGetDatum(reconstrValue);
     603 ECB             :     }
     604                 :     else
     605                 :     {
     606 GIC      116658 :         text       *fullText = palloc(VARHDRSZ + fullLen);
     607 ECB             : 
     608 GIC      116658 :         SET_VARSIZE(fullText, VARHDRSZ + fullLen);
     609 CBC      116658 :         fullValue = VARDATA(fullText);
     610          116658 :         if (level)
     611          116646 :             memcpy(fullValue, VARDATA(reconstrValue), level);
     612          116658 :         if (VARSIZE_ANY_EXHDR(leafValue) > 0)
     613          116658 :             memcpy(fullValue + level, VARDATA_ANY(leafValue),
     614          116658 :                    VARSIZE_ANY_EXHDR(leafValue));
     615          116658 :         out->leafValue = PointerGetDatum(fullText);
     616 ECB             :     }
     617                 : 
     618                 :     /* Perform the required comparison(s) */
     619 GIC      166410 :     res = true;
     620 CBC      180183 :     for (j = 0; j < in->nkeys; j++)
     621 ECB             :     {
     622 GIC      166410 :         StrategyNumber strategy = in->scankeys[j].sk_strategy;
     623 CBC      166410 :         text       *query = DatumGetTextPP(in->scankeys[j].sk_argument);
     624          166410 :         int         queryLen = VARSIZE_ANY_EXHDR(query);
     625 ECB             :         int         r;
     626                 : 
     627 GIC      166410 :         if (strategy == RTPrefixStrategyNumber)
     628 ECB             :         {
     629                 :             /*
     630                 :              * if level >= length of query then reconstrValue must begin with
     631                 :              * query (prefix) string, so we don't need to check it again.
     632                 :              */
     633 GIC         384 :             res = (level >= queryLen) ||
     634 CBC         192 :                 DatumGetBool(DirectFunctionCall2Coll(text_starts_with,
     635 ECB             :                                                      PG_GET_COLLATION(),
     636                 :                                                      out->leafValue,
     637                 :                                                      PointerGetDatum(query)));
     638                 : 
     639 GIC         192 :             if (!res)           /* no need to consider remaining conditions */
     640 CBC         168 :                 break;
     641 ECB             : 
     642 GIC          24 :             continue;
     643 ECB             :         }
     644                 : 
     645 GIC      166218 :         if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy))
     646 ECB             :         {
     647                 :             /* Collation-aware comparison */
     648 GIC      150024 :             strategy -= SPG_STRATEGY_ADDITION;
     649 ECB             : 
     650                 :             /* If asserts enabled, verify encoding of reconstructed string */
     651 GIC      150024 :             Assert(pg_verifymbstr(fullValue, fullLen, false));
     652 ECB             : 
     653 GIC      150024 :             r = varstr_cmp(fullValue, fullLen,
     654 CBC      150024 :                            VARDATA_ANY(query), queryLen,
     655 ECB             :                            PG_GET_COLLATION());
     656                 :         }
     657                 :         else
     658                 :         {
     659                 :             /* Non-collation-aware comparison */
     660 GIC       16194 :             r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen));
     661 ECB             : 
     662 GIC       16194 :             if (r == 0)
     663 ECB             :             {
     664 GIC       12081 :                 if (queryLen > fullLen)
     665 CBC        6012 :                     r = -1;
     666            6069 :                 else if (queryLen < fullLen)
     667 LBC           0 :                     r = 1;
     668 EUB             :             }
     669                 :         }
     670                 : 
     671 GIC      166218 :         switch (strategy)
     672 ECB             :         {
     673 GIC       39144 :             case BTLessStrategyNumber:
     674 CBC       39144 :                 res = (r < 0);
     675           39144 :                 break;
     676           39144 :             case BTLessEqualStrategyNumber:
     677           39144 :                 res = (r <= 0);
     678           39144 :                 break;
     679           12150 :             case BTEqualStrategyNumber:
     680           12150 :                 res = (r == 0);
     681           12150 :                 break;
     682           37890 :             case BTGreaterEqualStrategyNumber:
     683           37890 :                 res = (r >= 0);
     684           37890 :                 break;
     685           37890 :             case BTGreaterStrategyNumber:
     686           37890 :                 res = (r > 0);
     687           37890 :                 break;
     688 LBC           0 :             default:
     689 UBC           0 :                 elog(ERROR, "unrecognized strategy number: %d",
     690 EUB             :                      in->scankeys[j].sk_strategy);
     691                 :                 res = false;
     692                 :                 break;
     693                 :         }
     694                 : 
     695 GIC      166218 :         if (!res)
     696 CBC      152469 :             break;              /* no need to consider remaining conditions */
     697 ECB             :     }
     698                 : 
     699 GIC      166410 :     PG_RETURN_BOOL(res);
     700 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a