LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - tsquery_cleanup.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 76.5 % 162 124 1 5 23 9 4 66 4 50 24 68 1 1
Current Date: 2023-04-08 15:15:32 Functions: 77.8 % 9 7 2 6 1 2 7
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * tsquery_cleanup.c
       4                 :  *   Cleanup query from NOT values and/or stopword
       5                 :  *   Utility functions to correct work.
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/utils/adt/tsquery_cleanup.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : 
      16                 : #include "postgres.h"
      17                 : 
      18                 : #include "miscadmin.h"
      19                 : #include "tsearch/ts_utils.h"
      20                 : #include "varatt.h"
      21                 : 
      22                 : typedef struct NODE
      23                 : {
      24                 :     struct NODE *left;
      25                 :     struct NODE *right;
      26                 :     QueryItem  *valnode;
      27                 : } NODE;
      28                 : 
      29                 : /*
      30                 :  * make query tree from plain view of query
      31                 :  */
      32                 : static NODE *
      33 GIC        1530 : maketree(QueryItem *in)
      34 ECB             : {
      35 GIC        1530 :     NODE       *node = (NODE *) palloc(sizeof(NODE));
      36 ECB             : 
      37                 :     /* since this function recurses, it could be driven to stack overflow. */
      38 GIC        1530 :     check_stack_depth();
      39 ECB             : 
      40 GIC        1530 :     node->valnode = in;
      41 CBC        1530 :     node->right = node->left = NULL;
      42            1530 :     if (in->type == QI_OPR)
      43 ECB             :     {
      44 GIC         675 :         node->right = maketree(in + 1);
      45 CBC         675 :         if (in->qoperator.oper != OP_NOT)
      46             633 :             node->left = maketree(in + in->qoperator.left);
      47 ECB             :     }
      48 GIC        1530 :     return node;
      49 ECB             : }
      50                 : 
      51                 : /*
      52                 :  * Internal state for plaintree and plainnode
      53                 :  */
      54                 : typedef struct
      55                 : {
      56                 :     QueryItem  *ptr;
      57                 :     int         len;            /* allocated size of ptr */
      58                 :     int         cur;            /* number of elements in ptr */
      59                 : } PLAINTREE;
      60                 : 
      61                 : static void
      62 GIC         849 : plainnode(PLAINTREE *state, NODE *node)
      63 ECB             : {
      64                 :     /* since this function recurses, it could be driven to stack overflow. */
      65 GIC         849 :     check_stack_depth();
      66 ECB             : 
      67 GIC         849 :     if (state->cur == state->len)
      68 ECB             :     {
      69 UIC           0 :         state->len *= 2;
      70 UNC           0 :         state->ptr = (QueryItem *) repalloc(state->ptr, state->len * sizeof(QueryItem));
      71 EUB             :     }
      72 GNC         849 :     memcpy(&(state->ptr[state->cur]), node->valnode, sizeof(QueryItem));
      73 CBC         849 :     if (node->valnode->type == QI_VAL)
      74             513 :         state->cur++;
      75             336 :     else if (node->valnode->qoperator.oper == OP_NOT)
      76 ECB             :     {
      77 GIC          33 :         state->ptr[state->cur].qoperator.left = 1;
      78 CBC          33 :         state->cur++;
      79              33 :         plainnode(state, node->right);
      80 ECB             :     }
      81                 :     else
      82                 :     {
      83 GIC         303 :         int         cur = state->cur;
      84 ECB             : 
      85 GIC         303 :         state->cur++;
      86 CBC         303 :         plainnode(state, node->right);
      87             303 :         state->ptr[cur].qoperator.left = state->cur - cur;
      88             303 :         plainnode(state, node->left);
      89 ECB             :     }
      90 GIC         849 :     pfree(node);
      91 CBC         849 : }
      92 ECB             : 
      93                 : /*
      94                 :  * make plain view of tree from a NODE-tree representation
      95                 :  */
      96                 : static QueryItem *
      97 GIC         210 : plaintree(NODE *root, int *len)
      98 ECB             : {
      99                 :     PLAINTREE   pl;
     100                 : 
     101 GIC         210 :     pl.cur = 0;
     102 CBC         210 :     pl.len = 16;
     103             210 :     if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
     104 ECB             :     {
     105 GIC         210 :         pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
     106 CBC         210 :         plainnode(&pl, root);
     107 ECB             :     }
     108                 :     else
     109 UIC           0 :         pl.ptr = NULL;
     110 GBC         210 :     *len = pl.cur;
     111 CBC         210 :     return pl.ptr;
     112 ECB             : }
     113                 : 
     114                 : static void
     115 GIC          21 : freetree(NODE *node)
     116 ECB             : {
     117                 :     /* since this function recurses, it could be driven to stack overflow. */
     118 GIC          21 :     check_stack_depth();
     119 ECB             : 
     120 GIC          21 :     if (!node)
     121 LBC           0 :         return;
     122 GBC          21 :     if (node->left)
     123 LBC           0 :         freetree(node->left);
     124 GBC          21 :     if (node->right)
     125 LBC           0 :         freetree(node->right);
     126 GBC          21 :     pfree(node);
     127 ECB             : }
     128                 : 
     129                 : /*
     130                 :  * clean tree for ! operator.
     131                 :  * It's useful for debug, but in
     132                 :  * other case, such view is used with search in index.
     133                 :  * Operator ! always return TRUE
     134                 :  */
     135                 : static NODE *
     136 UIC           0 : clean_NOT_intree(NODE *node)
     137 EUB             : {
     138                 :     /* since this function recurses, it could be driven to stack overflow. */
     139 UIC           0 :     check_stack_depth();
     140 EUB             : 
     141 UIC           0 :     if (node->valnode->type == QI_VAL)
     142 UBC           0 :         return node;
     143 EUB             : 
     144 UIC           0 :     if (node->valnode->qoperator.oper == OP_NOT)
     145 EUB             :     {
     146 UIC           0 :         freetree(node);
     147 UBC           0 :         return NULL;
     148 EUB             :     }
     149                 : 
     150                 :     /* operator & or | */
     151 UIC           0 :     if (node->valnode->qoperator.oper == OP_OR)
     152 EUB             :     {
     153 UIC           0 :         if ((node->left = clean_NOT_intree(node->left)) == NULL ||
     154 UBC           0 :             (node->right = clean_NOT_intree(node->right)) == NULL)
     155 EUB             :         {
     156 UIC           0 :             freetree(node);
     157 UBC           0 :             return NULL;
     158 EUB             :         }
     159                 :     }
     160                 :     else
     161                 :     {
     162 UIC           0 :         NODE       *res = node;
     163 EUB             : 
     164 UIC           0 :         Assert(node->valnode->qoperator.oper == OP_AND ||
     165 EUB             :                node->valnode->qoperator.oper == OP_PHRASE);
     166                 : 
     167 UIC           0 :         node->left = clean_NOT_intree(node->left);
     168 UBC           0 :         node->right = clean_NOT_intree(node->right);
     169               0 :         if (node->left == NULL && node->right == NULL)
     170 EUB             :         {
     171 UIC           0 :             pfree(node);
     172 UBC           0 :             res = NULL;
     173 EUB             :         }
     174 UIC           0 :         else if (node->left == NULL)
     175 EUB             :         {
     176 UIC           0 :             res = node->right;
     177 UBC           0 :             pfree(node);
     178 EUB             :         }
     179 UIC           0 :         else if (node->right == NULL)
     180 EUB             :         {
     181 UIC           0 :             res = node->left;
     182 UBC           0 :             pfree(node);
     183 EUB             :         }
     184 UIC           0 :         return res;
     185 EUB             :     }
     186 UIC           0 :     return node;
     187 EUB             : }
     188                 : 
     189                 : QueryItem *
     190 UIC           0 : clean_NOT(QueryItem *ptr, int *len)
     191 EUB             : {
     192 UIC           0 :     NODE       *root = maketree(ptr);
     193 EUB             : 
     194 UIC           0 :     return plaintree(clean_NOT_intree(root), len);
     195 EUB             : }
     196                 : 
     197                 : 
     198                 : /*
     199                 :  * Remove QI_VALSTOP (stopword) nodes from query tree.
     200                 :  *
     201                 :  * Returns NULL if the query degenerates to nothing.  Input must not be NULL.
     202                 :  *
     203                 :  * When we remove a phrase operator due to removing one or both of its
     204                 :  * arguments, we might need to adjust the distance of a parent phrase
     205                 :  * operator.  For example, 'a' is a stopword, so:
     206                 :  *      (b <-> a) <-> c  should become  b <2> c
     207                 :  *      b <-> (a <-> c)  should become  b <2> c
     208                 :  *      (b <-> (a <-> a)) <-> c  should become    b <3> c
     209                 :  *      b <-> ((a <-> a) <-> c)  should become    b <3> c
     210                 :  * To handle that, we define two output parameters:
     211                 :  *      ladd: amount to add to a phrase distance to the left of this node
     212                 :  *      radd: amount to add to a phrase distance to the right of this node
     213                 :  * We need two outputs because we could need to bubble up adjustments to two
     214                 :  * different parent phrase operators.  Consider
     215                 :  *      w <-> (((a <-> x) <2> (y <3> a)) <-> z)
     216                 :  * After we've removed the two a's and are considering the <2> node (which is
     217                 :  * now just x <2> y), we have an ladd distance of 1 that needs to propagate
     218                 :  * up to the topmost (leftmost) <->, and an radd distance of 3 that needs to
     219                 :  * propagate to the rightmost <->, so that we'll end up with
     220                 :  *      w <2> ((x <2> y) <4> z)
     221                 :  * Near the bottom of the tree, we may have subtrees consisting only of
     222                 :  * stopwords.  The distances of any phrase operators within such a subtree are
     223                 :  * summed and propagated to both ladd and radd, since we don't know which side
     224                 :  * of the lowest surviving phrase operator we are in.  The rule is that any
     225                 :  * subtree that degenerates to NULL must return equal values of ladd and radd,
     226                 :  * and the parent node dealing with it should incorporate only one of those.
     227                 :  *
     228                 :  * Currently, we only implement this adjustment for adjacent phrase operators.
     229                 :  * Thus for example 'x <-> ((a <-> y) | z)' will become 'x <-> (y | z)', which
     230                 :  * isn't ideal, but there is no way to represent the really desired semantics
     231                 :  * without some redesign of the tsquery structure.  Certainly it would not be
     232                 :  * any better to convert that to 'x <2> (y | z)'.  Since this is such a weird
     233                 :  * corner case, let it go for now.  But we can fix it in cases where the
     234                 :  * intervening non-phrase operator also gets removed, for example
     235                 :  * '((x <-> a) | a) <-> y' will become 'x <2> y'.
     236                 :  */
     237                 : static NODE *
     238 GIC        1530 : clean_stopword_intree(NODE *node, int *ladd, int *radd)
     239 ECB             : {
     240                 :     /* since this function recurses, it could be driven to stack overflow. */
     241 GIC        1530 :     check_stack_depth();
     242 ECB             : 
     243                 :     /* default output parameters indicate no change in parent distance */
     244 GIC        1530 :     *ladd = *radd = 0;
     245 ECB             : 
     246 GIC        1530 :     if (node->valnode->type == QI_VAL)
     247 CBC         513 :         return node;
     248            1017 :     else if (node->valnode->type == QI_VALSTOP)
     249 ECB             :     {
     250 GIC         342 :         pfree(node);
     251 CBC         342 :         return NULL;
     252 ECB             :     }
     253                 : 
     254 GIC         675 :     Assert(node->valnode->type == QI_OPR);
     255 ECB             : 
     256 GIC         675 :     if (node->valnode->qoperator.oper == OP_NOT)
     257 ECB             :     {
     258                 :         /* NOT doesn't change pattern width, so just report child distances */
     259 GIC          42 :         node->right = clean_stopword_intree(node->right, ladd, radd);
     260 CBC          42 :         if (!node->right)
     261 ECB             :         {
     262 GIC           9 :             freetree(node);
     263 CBC           9 :             return NULL;
     264 ECB             :         }
     265                 :     }
     266                 :     else
     267                 :     {
     268 GIC         633 :         NODE       *res = node;
     269 ECB             :         bool        isphrase;
     270                 :         int         ndistance,
     271                 :                     lladd,
     272                 :                     lradd,
     273                 :                     rladd,
     274                 :                     rradd;
     275                 : 
     276                 :         /* First, recurse */
     277 GIC         633 :         node->left = clean_stopword_intree(node->left, &lladd, &lradd);
     278 CBC         633 :         node->right = clean_stopword_intree(node->right, &rladd, &rradd);
     279 ECB             : 
     280                 :         /* Check if current node is OP_PHRASE, get its distance */
     281 GIC         633 :         isphrase = (node->valnode->qoperator.oper == OP_PHRASE);
     282 CBC         633 :         ndistance = isphrase ? node->valnode->qoperator.distance : 0;
     283 ECB             : 
     284 GIC         633 :         if (node->left == NULL && node->right == NULL)
     285 ECB             :         {
     286                 :             /*
     287                 :              * When we collapse out a phrase node entirely, propagate its own
     288                 :              * distance into both *ladd and *radd; it is the responsibility of
     289                 :              * the parent node to count it only once.  Also, for a phrase
     290                 :              * node, distances coming from children are summed and propagated
     291                 :              * up to parent (we assume lladd == lradd and rladd == rradd, else
     292                 :              * rule was broken at a lower level).  But if this isn't a phrase
     293                 :              * node, take the larger of the two child distances; that
     294                 :              * corresponds to what TS_execute will do in non-stopword cases.
     295                 :              */
     296 GIC          12 :             if (isphrase)
     297 LBC           0 :                 *ladd = *radd = lladd + ndistance + rladd;
     298 EUB             :             else
     299 GIC          12 :                 *ladd = *radd = Max(lladd, rladd);
     300 CBC          12 :             freetree(node);
     301              12 :             return NULL;
     302 ECB             :         }
     303 GIC         621 :         else if (node->left == NULL)
     304 ECB             :         {
     305                 :             /* Removing this operator and left subnode */
     306                 :             /* lladd and lradd are equal/redundant, don't count both */
     307 GIC         126 :             if (isphrase)
     308 ECB             :             {
     309                 :                 /* operator's own distance must propagate to left */
     310 GIC          81 :                 *ladd = lladd + ndistance + rladd;
     311 CBC          81 :                 *radd = rradd;
     312 ECB             :             }
     313                 :             else
     314                 :             {
     315                 :                 /* at non-phrase op, just forget the left subnode entirely */
     316 GIC          45 :                 *ladd = rladd;
     317 CBC          45 :                 *radd = rradd;
     318 ECB             :             }
     319 GIC         126 :             res = node->right;
     320 CBC         126 :             pfree(node);
     321 ECB             :         }
     322 GIC         495 :         else if (node->right == NULL)
     323 ECB             :         {
     324                 :             /* Removing this operator and right subnode */
     325                 :             /* rladd and rradd are equal/redundant, don't count both */
     326 GIC         192 :             if (isphrase)
     327 ECB             :             {
     328                 :                 /* operator's own distance must propagate to right */
     329 GIC         108 :                 *ladd = lladd;
     330 CBC         108 :                 *radd = lradd + ndistance + rradd;
     331 ECB             :             }
     332                 :             else
     333                 :             {
     334                 :                 /* at non-phrase op, just forget the right subnode entirely */
     335 GIC          84 :                 *ladd = lladd;
     336 CBC          84 :                 *radd = lradd;
     337 ECB             :             }
     338 GIC         192 :             res = node->left;
     339 CBC         192 :             pfree(node);
     340 ECB             :         }
     341 GIC         303 :         else if (isphrase)
     342 ECB             :         {
     343                 :             /* Absorb appropriate corrections at this level */
     344 GIC         171 :             node->valnode->qoperator.distance += lradd + rladd;
     345 ECB             :             /* Propagate up any unaccounted-for corrections */
     346 GIC         171 :             *ladd = lladd;
     347 CBC         171 :             *radd = rradd;
     348 ECB             :         }
     349                 :         else
     350                 :         {
     351                 :             /* We're keeping a non-phrase operator, so ladd/radd remain 0 */
     352                 :         }
     353                 : 
     354 GIC         621 :         return res;
     355 ECB             :     }
     356 GIC          33 :     return node;
     357 ECB             : }
     358                 : 
     359                 : /*
     360                 :  * Number of elements in query tree
     361                 :  */
     362                 : static int32
     363 GIC         849 : calcstrlen(NODE *node)
     364 ECB             : {
     365 GIC         849 :     int32       size = 0;
     366 ECB             : 
     367 GIC         849 :     if (node->valnode->type == QI_VAL)
     368 ECB             :     {
     369 GIC         513 :         size = node->valnode->qoperand.length + 1;
     370 ECB             :     }
     371                 :     else
     372                 :     {
     373 GIC         336 :         Assert(node->valnode->type == QI_OPR);
     374 ECB             : 
     375 GIC         336 :         size = calcstrlen(node->right);
     376 CBC         336 :         if (node->valnode->qoperator.oper != OP_NOT)
     377             303 :             size += calcstrlen(node->left);
     378 ECB             :     }
     379                 : 
     380 GIC         849 :     return size;
     381 ECB             : }
     382                 : 
     383                 : /*
     384                 :  * Remove QI_VALSTOP (stopword) nodes from TSQuery.
     385                 :  */
     386                 : TSQuery
     387 GNC         222 : cleanup_tsquery_stopwords(TSQuery in, bool noisy)
     388 ECB             : {
     389                 :     int32       len,
     390                 :                 lenstr,
     391                 :                 commonlen,
     392                 :                 i;
     393                 :     NODE       *root;
     394                 :     int         ladd,
     395                 :                 radd;
     396                 :     TSQuery     out;
     397                 :     QueryItem  *items;
     398                 :     char       *operands;
     399                 : 
     400 GIC         222 :     if (in->size == 0)
     401 LBC           0 :         return in;
     402 EUB             : 
     403                 :     /* eliminate stop words */
     404 GIC         222 :     root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
     405 CBC         222 :     if (root == NULL)
     406 ECB             :     {
     407 GNC          12 :         if (noisy)
     408              12 :             ereport(NOTICE,
     409                 :                     (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
     410 CBC          12 :         out = palloc(HDRSIZETQ);
     411 GIC          12 :         out->size = 0;
     412 CBC          12 :         SET_VARSIZE(out, HDRSIZETQ);
     413              12 :         return out;
     414 ECB             :     }
     415                 : 
     416                 :     /*
     417                 :      * Build TSQuery from plain view
     418                 :      */
     419                 : 
     420 GIC         210 :     lenstr = calcstrlen(root);
     421             210 :     items = plaintree(root, &len);
     422 CBC         210 :     commonlen = COMPUTESIZE(len, lenstr);
     423 ECB             : 
     424 CBC         210 :     out = palloc(commonlen);
     425 GIC         210 :     SET_VARSIZE(out, commonlen);
     426 CBC         210 :     out->size = len;
     427 ECB             : 
     428 CBC         210 :     memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
     429                 : 
     430             210 :     items = GETQUERY(out);
     431 GIC         210 :     operands = GETOPERAND(out);
     432 CBC        1059 :     for (i = 0; i < out->size; i++)
     433 ECB             :     {
     434 CBC         849 :         QueryOperand *op = (QueryOperand *) &items[i];
     435                 : 
     436             849 :         if (op->type != QI_VAL)
     437 GIC         336 :             continue;
     438 ECB             : 
     439 CBC         513 :         memcpy(operands, GETOPERAND(in) + op->distance, op->length);
     440 GIC         513 :         operands[op->length] = '\0';
     441 CBC         513 :         op->distance = operands - GETOPERAND(out);
     442             513 :         operands += op->length + 1;
     443 ECB             :     }
     444                 : 
     445 GIC         210 :     return out;
     446                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a