LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - tsvector_op.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 86.5 % 1182 1022 9 46 60 45 29 503 62 428 86 543 5
Current Date: 2023-04-08 15:15:32 Functions: 82.7 % 52 43 9 41 2 9 43
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * tsvector_op.c
       4                 :  *    operations over tsvector
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  *
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *    src/backend/utils/adt/tsvector_op.c
      11                 :  *
      12                 :  *-------------------------------------------------------------------------
      13                 :  */
      14                 : #include "postgres.h"
      15                 : 
      16                 : #include <limits.h>
      17                 : 
      18                 : #include "access/htup_details.h"
      19                 : #include "catalog/namespace.h"
      20                 : #include "catalog/pg_type.h"
      21                 : #include "commands/trigger.h"
      22                 : #include "executor/spi.h"
      23                 : #include "funcapi.h"
      24                 : #include "lib/qunique.h"
      25                 : #include "mb/pg_wchar.h"
      26                 : #include "miscadmin.h"
      27                 : #include "parser/parse_coerce.h"
      28                 : #include "tsearch/ts_utils.h"
      29                 : #include "utils/array.h"
      30                 : #include "utils/builtins.h"
      31                 : #include "utils/lsyscache.h"
      32                 : #include "utils/regproc.h"
      33                 : #include "utils/rel.h"
      34                 : 
      35                 : 
      36                 : typedef struct
      37                 : {
      38                 :     WordEntry  *arrb;
      39                 :     WordEntry  *arre;
      40                 :     char       *values;
      41                 :     char       *operand;
      42                 : } CHKVAL;
      43                 : 
      44                 : 
      45                 : typedef struct StatEntry
      46                 : {
      47                 :     uint32      ndoc;           /* zero indicates that we were already here
      48                 :                                  * while walking through the tree */
      49                 :     uint32      nentry;
      50                 :     struct StatEntry *left;
      51                 :     struct StatEntry *right;
      52                 :     uint32      lenlexeme;
      53                 :     char        lexeme[FLEXIBLE_ARRAY_MEMBER];
      54                 : } StatEntry;
      55                 : 
      56                 : #define STATENTRYHDRSZ  (offsetof(StatEntry, lexeme))
      57                 : 
      58                 : typedef struct
      59                 : {
      60                 :     int32       weight;
      61                 : 
      62                 :     uint32      maxdepth;
      63                 : 
      64                 :     StatEntry **stack;
      65                 :     uint32      stackpos;
      66                 : 
      67                 :     StatEntry  *root;
      68                 : } TSVectorStat;
      69                 : 
      70                 : 
      71                 : static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
      72                 :                                          uint32 flags,
      73                 :                                          TSExecuteCallback chkcond);
      74                 : static bool TS_execute_locations_recurse(QueryItem *curitem,
      75                 :                                          void *arg,
      76                 :                                          TSExecuteCallback chkcond,
      77                 :                                          List **locations);
      78                 : static int  tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
      79                 : static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
      80                 : 
      81                 : 
      82                 : /*
      83                 :  * Order: haspos, len, word, for all positions (pos, weight)
      84                 :  */
      85                 : static int
      86 GIC           1 : silly_cmp_tsvector(const TSVector a, const TSVector b)
      87                 : {
      88               1 :     if (VARSIZE(a) < VARSIZE(b))
      89 UIC           0 :         return -1;
      90 CBC           1 :     else if (VARSIZE(a) > VARSIZE(b))
      91 UIC           0 :         return 1;
      92 CBC           1 :     else if (a->size < b->size)
      93 UBC           0 :         return -1;
      94 CBC           1 :     else if (a->size > b->size)
      95 UBC           0 :         return 1;
      96 ECB             :     else
      97 EUB             :     {
      98 CBC           1 :         WordEntry  *aptr = ARRPTR(a);
      99 GBC           1 :         WordEntry  *bptr = ARRPTR(b);
     100 GIC           1 :         int         i = 0;
     101                 :         int         res;
     102 ECB             : 
     103                 : 
     104 CBC           4 :         for (i = 0; i < a->size; i++)
     105                 :         {
     106 GIC           3 :             if (aptr->haspos != bptr->haspos)
     107                 :             {
     108 LBC           0 :                 return (aptr->haspos > bptr->haspos) ? -1 : 1;
     109                 :             }
     110 CBC           3 :             else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
     111                 :             {
     112 UBC           0 :                 return res;
     113                 :             }
     114 CBC           3 :             else if (aptr->haspos)
     115                 :             {
     116 UBC           0 :                 WordEntryPos *ap = POSDATAPTR(a, aptr);
     117 UIC           0 :                 WordEntryPos *bp = POSDATAPTR(b, bptr);
     118 ECB             :                 int         j;
     119                 : 
     120 UBC           0 :                 if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
     121               0 :                     return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
     122                 : 
     123 UIC           0 :                 for (j = 0; j < POSDATALEN(a, aptr); j++)
     124 EUB             :                 {
     125 UBC           0 :                     if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
     126                 :                     {
     127               0 :                         return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
     128                 :                     }
     129               0 :                     else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
     130                 :                     {
     131               0 :                         return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
     132                 :                     }
     133               0 :                     ap++, bp++;
     134                 :                 }
     135 EUB             :             }
     136                 : 
     137 GBC           3 :             aptr++;
     138 GIC           3 :             bptr++;
     139                 :         }
     140                 :     }
     141 ECB             : 
     142 CBC           1 :     return 0;
     143                 : }
     144                 : 
     145                 : #define TSVECTORCMPFUNC( type, action, ret )            \
     146 ECB             : Datum                                                   \
     147                 : tsvector_##type(PG_FUNCTION_ARGS)                       \
     148                 : {                                                       \
     149                 :     TSVector    a = PG_GETARG_TSVECTOR(0);              \
     150                 :     TSVector    b = PG_GETARG_TSVECTOR(1);              \
     151                 :     int         res = silly_cmp_tsvector(a, b);         \
     152                 :     PG_FREE_IF_COPY(a,0);                               \
     153                 :     PG_FREE_IF_COPY(b,1);                               \
     154                 :     PG_RETURN_##ret( res action 0 );                    \
     155                 : }   \
     156                 : /* keep compiler quiet - no extra ; */                  \
     157                 : extern int no_such_variable
     158                 : 
     159 UIC           0 : TSVECTORCMPFUNC(lt, <, BOOL);
     160               0 : TSVECTORCMPFUNC(le, <=, BOOL);
     161 GIC           1 : TSVECTORCMPFUNC(eq, ==, BOOL);
     162 UIC           0 : TSVECTORCMPFUNC(ge, >=, BOOL);
     163 UBC           0 : TSVECTORCMPFUNC(gt, >, BOOL);
     164               0 : TSVECTORCMPFUNC(ne, !=, BOOL);
     165 LBC           0 : TSVECTORCMPFUNC(cmp, +, INT32);
     166 EUB             : 
     167                 : Datum
     168 GBC          45 : tsvector_strip(PG_FUNCTION_ARGS)
     169 EUB             : {
     170 GIC          45 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     171                 :     TSVector    out;
     172 ECB             :     int         i,
     173 GIC          45 :                 len = 0;
     174 CBC          45 :     WordEntry  *arrin = ARRPTR(in),
     175                 :                *arrout;
     176                 :     char       *cur;
     177 ECB             : 
     178 CBC         159 :     for (i = 0; i < in->size; i++)
     179 GIC         114 :         len += arrin[i].len;
     180                 : 
     181              45 :     len = CALCDATASIZE(in->size, len);
     182 CBC          45 :     out = (TSVector) palloc0(len);
     183              45 :     SET_VARSIZE(out, len);
     184 GIC          45 :     out->size = in->size;
     185 CBC          45 :     arrout = ARRPTR(out);
     186              45 :     cur = STRPTR(out);
     187             159 :     for (i = 0; i < in->size; i++)
     188 ECB             :     {
     189 CBC         114 :         memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
     190             114 :         arrout[i].haspos = 0;
     191             114 :         arrout[i].len = arrin[i].len;
     192 GIC         114 :         arrout[i].pos = cur - STRPTR(out);
     193 CBC         114 :         cur += arrout[i].len;
     194 ECB             :     }
     195                 : 
     196 CBC          45 :     PG_FREE_IF_COPY(in, 0);
     197              45 :     PG_RETURN_POINTER(out);
     198                 : }
     199                 : 
     200 ECB             : Datum
     201 CBC           5 : tsvector_length(PG_FUNCTION_ARGS)
     202                 : {
     203 GIC           5 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     204               5 :     int32       ret = in->size;
     205 ECB             : 
     206 GIC           5 :     PG_FREE_IF_COPY(in, 0);
     207 CBC           5 :     PG_RETURN_INT32(ret);
     208 ECB             : }
     209                 : 
     210                 : Datum
     211 CBC           6 : tsvector_setweight(PG_FUNCTION_ARGS)
     212                 : {
     213 GIC           6 :     TSVector    in = PG_GETARG_TSVECTOR(0);
     214               6 :     char        cw = PG_GETARG_CHAR(1);
     215 ECB             :     TSVector    out;
     216                 :     int         i,
     217                 :                 j;
     218                 :     WordEntry  *entry;
     219                 :     WordEntryPos *p;
     220 GIC           6 :     int         w = 0;
     221                 : 
     222               6 :     switch (cw)
     223                 :     {
     224 LBC           0 :         case 'A':
     225                 :         case 'a':
     226               0 :             w = 3;
     227 UIC           0 :             break;
     228 UBC           0 :         case 'B':
     229                 :         case 'b':
     230               0 :             w = 2;
     231               0 :             break;
     232 GBC           6 :         case 'C':
     233                 :         case 'c':
     234               6 :             w = 1;
     235               6 :             break;
     236 LBC           0 :         case 'D':
     237                 :         case 'd':
     238               0 :             w = 0;
     239               0 :             break;
     240 UBC           0 :         default:
     241                 :             /* internal error */
     242               0 :             elog(ERROR, "unrecognized weight: %d", cw);
     243 EUB             :     }
     244                 : 
     245 GIC           6 :     out = (TSVector) palloc(VARSIZE(in));
     246 GBC           6 :     memcpy(out, in, VARSIZE(in));
     247 GIC           6 :     entry = ARRPTR(out);
     248               6 :     i = out->size;
     249 CBC          30 :     while (i--)
     250 ECB             :     {
     251 CBC          24 :         if ((j = POSDATALEN(out, entry)) != 0)
     252 ECB             :         {
     253 CBC          24 :             p = POSDATAPTR(out, entry);
     254 GIC          84 :             while (j--)
     255 ECB             :             {
     256 GIC          60 :                 WEP_SETWEIGHT(*p, w);
     257 CBC          60 :                 p++;
     258 ECB             :             }
     259                 :         }
     260 CBC          24 :         entry++;
     261 ECB             :     }
     262                 : 
     263 GIC           6 :     PG_FREE_IF_COPY(in, 0);
     264 CBC           6 :     PG_RETURN_POINTER(out);
     265                 : }
     266                 : 
     267 ECB             : /*
     268                 :  * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
     269                 :  *
     270                 :  * Assign weight w to elements of tsin that are listed in lexemes.
     271                 :  */
     272                 : Datum
     273 GIC          12 : tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
     274                 : {
     275              12 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     276              12 :     char        char_weight = PG_GETARG_CHAR(1);
     277 CBC          12 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
     278                 : 
     279 ECB             :     TSVector    tsout;
     280                 :     int         i,
     281                 :                 j,
     282                 :                 nlexemes,
     283                 :                 weight;
     284                 :     WordEntry  *entry;
     285                 :     Datum      *dlexemes;
     286                 :     bool       *nulls;
     287                 : 
     288 GIC          12 :     switch (char_weight)
     289                 :     {
     290 UIC           0 :         case 'A':
     291                 :         case 'a':
     292 LBC           0 :             weight = 3;
     293 UIC           0 :             break;
     294 UBC           0 :         case 'B':
     295                 :         case 'b':
     296               0 :             weight = 2;
     297               0 :             break;
     298 GBC          12 :         case 'C':
     299                 :         case 'c':
     300              12 :             weight = 1;
     301              12 :             break;
     302 LBC           0 :         case 'D':
     303                 :         case 'd':
     304               0 :             weight = 0;
     305               0 :             break;
     306 UBC           0 :         default:
     307                 :             /* internal error */
     308               0 :             elog(ERROR, "unrecognized weight: %c", char_weight);
     309 EUB             :     }
     310                 : 
     311 GIC          12 :     tsout = (TSVector) palloc(VARSIZE(tsin));
     312 GBC          12 :     memcpy(tsout, tsin, VARSIZE(tsin));
     313 GIC          12 :     entry = ARRPTR(tsout);
     314                 : 
     315 GNC          12 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlexemes);
     316 ECB             : 
     317                 :     /*
     318                 :      * Assuming that lexemes array is significantly shorter than tsvector we
     319                 :      * can iterate through lexemes performing binary search of each lexeme
     320                 :      * from lexemes in tsvector.
     321                 :      */
     322 GIC          36 :     for (i = 0; i < nlexemes; i++)
     323                 :     {
     324                 :         char       *lex;
     325 ECB             :         int         lex_len,
     326                 :                     lex_pos;
     327                 : 
     328                 :         /* Ignore null array elements, they surely don't match */
     329 GIC          24 :         if (nulls[i])
     330               3 :             continue;
     331                 : 
     332 CBC          21 :         lex = VARDATA(dlexemes[i]);
     333              21 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     334 GIC          21 :         lex_pos = tsvector_bsearch(tsout, lex, lex_len);
     335 ECB             : 
     336 CBC          21 :         if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
     337 ECB             :         {
     338 GIC          12 :             WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
     339 ECB             : 
     340 GIC          39 :             while (j--)
     341 ECB             :             {
     342 GIC          27 :                 WEP_SETWEIGHT(*p, weight);
     343 CBC          27 :                 p++;
     344                 :             }
     345 ECB             :         }
     346                 :     }
     347                 : 
     348 GIC          12 :     PG_FREE_IF_COPY(tsin, 0);
     349              12 :     PG_FREE_IF_COPY(lexemes, 2);
     350                 : 
     351 CBC          12 :     PG_RETURN_POINTER(tsout);
     352 ECB             : }
     353                 : 
     354                 : #define compareEntry(pa, a, pb, b) \
     355                 :     tsCompareString((pa) + (a)->pos, (a)->len,    \
     356                 :                     (pb) + (b)->pos, (b)->len,    \
     357                 :                     false)
     358                 : 
     359                 : /*
     360                 :  * Add positions from src to dest after offsetting them by maxpos.
     361                 :  * Return the number added (might be less than expected due to overflow)
     362                 :  */
     363                 : static int32
     364 GIC           6 : add_pos(TSVector src, WordEntry *srcptr,
     365                 :         TSVector dest, WordEntry *destptr,
     366                 :         int32 maxpos)
     367 ECB             : {
     368 GIC           6 :     uint16     *clen = &_POSVECPTR(dest, destptr)->npos;
     369                 :     int         i;
     370               6 :     uint16      slen = POSDATALEN(src, srcptr),
     371 ECB             :                 startlen;
     372 GIC           6 :     WordEntryPos *spos = POSDATAPTR(src, srcptr),
     373 CBC           6 :                *dpos = POSDATAPTR(dest, destptr);
     374                 : 
     375               6 :     if (!destptr->haspos)
     376 LBC           0 :         *clen = 0;
     377                 : 
     378 CBC           6 :     startlen = *clen;
     379 GBC           6 :     for (i = 0;
     380 GIC          12 :          i < slen && *clen < MAXNUMPOS &&
     381 CBC           6 :          (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
     382               6 :          i++)
     383 ECB             :     {
     384 CBC           6 :         WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
     385               6 :         WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
     386 GIC           6 :         (*clen)++;
     387 ECB             :     }
     388                 : 
     389 CBC           6 :     if (*clen != startlen)
     390 GIC           6 :         destptr->haspos = 1;
     391               6 :     return *clen - startlen;
     392 ECB             : }
     393                 : 
     394                 : /*
     395                 :  * Perform binary search of given lexeme in TSVector.
     396                 :  * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
     397                 :  * found.
     398                 :  */
     399                 : static int
     400 GIC          99 : tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
     401                 : {
     402              99 :     WordEntry  *arrin = ARRPTR(tsv);
     403 CBC          99 :     int         StopLow = 0,
     404 GIC          99 :                 StopHigh = tsv->size,
     405 ECB             :                 StopMiddle,
     406                 :                 cmp;
     407                 : 
     408 GIC         261 :     while (StopLow < StopHigh)
     409                 :     {
     410             231 :         StopMiddle = (StopLow + StopHigh) / 2;
     411 ECB             : 
     412 GIC         231 :         cmp = tsCompareString(lexeme, lexeme_len,
     413 CBC         231 :                               STRPTR(tsv) + arrin[StopMiddle].pos,
     414 GIC         231 :                               arrin[StopMiddle].len,
     415 ECB             :                               false);
     416                 : 
     417 CBC         231 :         if (cmp < 0)
     418 GIC         108 :             StopHigh = StopMiddle;
     419             123 :         else if (cmp > 0)
     420 CBC          54 :             StopLow = StopMiddle + 1;
     421 ECB             :         else                    /* found it */
     422 CBC          69 :             return StopMiddle;
     423 ECB             :     }
     424                 : 
     425 CBC          30 :     return -1;
     426                 : }
     427                 : 
     428 ECB             : /*
     429                 :  * qsort comparator functions
     430                 :  */
     431                 : 
     432                 : static int
     433 GIC          39 : compare_int(const void *va, const void *vb)
     434                 : {
     435              39 :     int         a = *((const int *) va);
     436 CBC          39 :     int         b = *((const int *) vb);
     437                 : 
     438              39 :     if (a == b)
     439               6 :         return 0;
     440 GIC          33 :     return (a > b) ? 1 : -1;
     441 ECB             : }
     442                 : 
     443                 : static int
     444 GIC          51 : compare_text_lexemes(const void *va, const void *vb)
     445                 : {
     446              51 :     Datum       a = *((const Datum *) va);
     447 CBC          51 :     Datum       b = *((const Datum *) vb);
     448 GIC          51 :     char       *alex = VARDATA_ANY(a);
     449 CBC          51 :     int         alex_len = VARSIZE_ANY_EXHDR(a);
     450              51 :     char       *blex = VARDATA_ANY(b);
     451              51 :     int         blex_len = VARSIZE_ANY_EXHDR(b);
     452 ECB             : 
     453 CBC          51 :     return tsCompareString(alex, alex_len, blex, blex_len, false);
     454 ECB             : }
     455                 : 
     456                 : /*
     457                 :  * Internal routine to delete lexemes from TSVector by array of offsets.
     458                 :  *
     459                 :  * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
     460                 :  * int indices_count -- size of that array
     461                 :  *
     462                 :  * Returns new TSVector without given lexemes along with their positions
     463                 :  * and weights.
     464                 :  */
     465                 : static TSVector
     466 GIC          33 : tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
     467                 :                            int indices_count)
     468                 : {
     469 ECB             :     TSVector    tsout;
     470 GIC          33 :     WordEntry  *arrin = ARRPTR(tsv),
     471                 :                *arrout;
     472              33 :     char       *data = STRPTR(tsv),
     473 ECB             :                *dataout;
     474                 :     int         i,              /* index in arrin */
     475                 :                 j,              /* index in arrout */
     476                 :                 k,              /* index in indices_to_delete */
     477                 :                 curoff;         /* index in dataout area */
     478                 : 
     479                 :     /*
     480                 :      * Sort the filter array to simplify membership checks below.  Also, get
     481                 :      * rid of any duplicate entries, so that we can assume that indices_count
     482                 :      * is exactly equal to the number of lexemes that will be removed.
     483                 :      */
     484 GIC          33 :     if (indices_count > 1)
     485                 :     {
     486              15 :         qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
     487 CBC          15 :         indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
     488                 :                                 compare_int);
     489 ECB             :     }
     490                 : 
     491                 :     /*
     492                 :      * Here we overestimate tsout size, since we don't know how much space is
     493                 :      * used by the deleted lexeme(s).  We will set exact size below.
     494                 :      */
     495 GIC          33 :     tsout = (TSVector) palloc0(VARSIZE(tsv));
     496                 : 
     497                 :     /* This count must be correct because STRPTR(tsout) relies on it. */
     498 CBC          33 :     tsout->size = tsv->size - indices_count;
     499                 : 
     500                 :     /*
     501 ECB             :      * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
     502                 :      */
     503 GIC          33 :     arrout = ARRPTR(tsout);
     504              33 :     dataout = STRPTR(tsout);
     505              33 :     curoff = 0;
     506 CBC         198 :     for (i = j = k = 0; i < tsv->size; i++)
     507 ECB             :     {
     508                 :         /*
     509                 :          * If current i is present in indices_to_delete, skip this lexeme.
     510                 :          * Since indices_to_delete is already sorted, we only need to check
     511                 :          * the current (k'th) entry.
     512                 :          */
     513 GIC         165 :         if (k < indices_count && i == indices_to_delete[k])
     514                 :         {
     515              48 :             k++;
     516 CBC          48 :             continue;
     517                 :         }
     518 ECB             : 
     519                 :         /* Copy lexeme and its positions and weights */
     520 GIC         117 :         memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
     521             117 :         arrout[j].haspos = arrin[i].haspos;
     522             117 :         arrout[j].len = arrin[i].len;
     523 CBC         117 :         arrout[j].pos = curoff;
     524             117 :         curoff += arrin[i].len;
     525             117 :         if (arrin[i].haspos)
     526 ECB             :         {
     527 CBC          78 :             int         len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
     528              78 :             + sizeof(uint16);
     529                 : 
     530              78 :             curoff = SHORTALIGN(curoff);
     531              78 :             memcpy(dataout + curoff,
     532 GIC          78 :                    STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
     533 ECB             :                    len);
     534 CBC          78 :             curoff += len;
     535 ECB             :         }
     536                 : 
     537 CBC         117 :         j++;
     538                 :     }
     539                 : 
     540 ECB             :     /*
     541                 :      * k should now be exactly equal to indices_count. If it isn't then the
     542                 :      * caller provided us with indices outside of [0, tsv->size) range and
     543                 :      * estimation of tsout's size is wrong.
     544                 :      */
     545 GIC          33 :     Assert(k == indices_count);
     546                 : 
     547              33 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
     548 CBC          33 :     return tsout;
     549                 : }
     550 ECB             : 
     551                 : /*
     552                 :  * Delete given lexeme from tsvector.
     553                 :  * Implementation of user-level ts_delete(tsvector, text).
     554                 :  */
     555                 : Datum
     556 GIC          18 : tsvector_delete_str(PG_FUNCTION_ARGS)
     557                 : {
     558              18 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     559 ECB             :                 tsout;
     560 GIC          18 :     text       *tlexeme = PG_GETARG_TEXT_PP(1);
     561 CBC          18 :     char       *lexeme = VARDATA_ANY(tlexeme);
     562 GIC          18 :     int         lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
     563 ECB             :                 skip_index;
     564                 : 
     565 CBC          18 :     if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
     566 GIC           6 :         PG_RETURN_POINTER(tsin);
     567                 : 
     568 CBC          12 :     tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
     569 ECB             : 
     570 GIC          12 :     PG_FREE_IF_COPY(tsin, 0);
     571 CBC          12 :     PG_FREE_IF_COPY(tlexeme, 1);
     572 GIC          12 :     PG_RETURN_POINTER(tsout);
     573 ECB             : }
     574                 : 
     575                 : /*
     576                 :  * Delete given array of lexemes from tsvector.
     577                 :  * Implementation of user-level ts_delete(tsvector, text[]).
     578                 :  */
     579                 : Datum
     580 GIC          21 : tsvector_delete_arr(PG_FUNCTION_ARGS)
     581                 : {
     582              21 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     583 ECB             :                 tsout;
     584 GIC          21 :     ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
     585 ECB             :     int         i,
     586                 :                 nlex,
     587                 :                 skip_count,
     588                 :                *skip_indices;
     589                 :     Datum      *dlexemes;
     590                 :     bool       *nulls;
     591                 : 
     592 GNC          21 :     deconstruct_array_builtin(lexemes, TEXTOID, &dlexemes, &nulls, &nlex);
     593                 : 
     594 ECB             :     /*
     595                 :      * In typical use case array of lexemes to delete is relatively small. So
     596                 :      * here we optimize things for that scenario: iterate through lexarr
     597                 :      * performing binary search of each lexeme from lexarr in tsvector.
     598                 :      */
     599 GIC          21 :     skip_indices = palloc0(nlex * sizeof(int));
     600              84 :     for (i = skip_count = 0; i < nlex; i++)
     601 ECB             :     {
     602                 :         char       *lex;
     603                 :         int         lex_len,
     604                 :                     lex_pos;
     605                 : 
     606                 :         /* Ignore null array elements, they surely don't match */
     607 GIC          63 :         if (nulls[i])
     608               3 :             continue;
     609 ECB             : 
     610 CBC          60 :         lex = VARDATA(dlexemes[i]);
     611 GIC          60 :         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     612 CBC          60 :         lex_pos = tsvector_bsearch(tsin, lex, lex_len);
     613 ECB             : 
     614 CBC          60 :         if (lex_pos >= 0)
     615 GIC          39 :             skip_indices[skip_count++] = lex_pos;
     616 ECB             :     }
     617                 : 
     618 GIC          21 :     tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
     619                 : 
     620 CBC          21 :     pfree(skip_indices);
     621 GIC          21 :     PG_FREE_IF_COPY(tsin, 0);
     622 CBC          21 :     PG_FREE_IF_COPY(lexemes, 1);
     623 ECB             : 
     624 CBC          21 :     PG_RETURN_POINTER(tsout);
     625                 : }
     626 ECB             : 
     627                 : /*
     628                 :  * Expand tsvector as table with following columns:
     629                 :  *     lexeme: lexeme text
     630                 :  *     positions: integer array of lexeme positions
     631                 :  *     weights: char array of weights corresponding to positions
     632                 :  */
     633                 : Datum
     634 GIC          90 : tsvector_unnest(PG_FUNCTION_ARGS)
     635                 : {
     636 ECB             :     FuncCallContext *funcctx;
     637                 :     TSVector    tsin;
     638                 : 
     639 GIC          90 :     if (SRF_IS_FIRSTCALL())
     640                 :     {
     641 ECB             :         MemoryContext oldcontext;
     642                 :         TupleDesc   tupdesc;
     643                 : 
     644 GIC          15 :         funcctx = SRF_FIRSTCALL_INIT();
     645              15 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
     646 ECB             : 
     647 CBC          15 :         tupdesc = CreateTemplateTupleDesc(3);
     648 GIC          15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
     649 ECB             :                            TEXTOID, -1, 0);
     650 CBC          15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
     651                 :                            INT2ARRAYOID, -1, 0);
     652              15 :         TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
     653                 :                            TEXTARRAYOID, -1, 0);
     654 GNC          15 :         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
     655 UNC           0 :             elog(ERROR, "return type must be a row type");
     656 GNC          15 :         funcctx->tuple_desc = tupdesc;
     657                 : 
     658 CBC          15 :         funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
     659 EUB             : 
     660 CBC          15 :         MemoryContextSwitchTo(oldcontext);
     661                 :     }
     662 ECB             : 
     663 GIC          90 :     funcctx = SRF_PERCALL_SETUP();
     664 CBC          90 :     tsin = (TSVector) funcctx->user_fctx;
     665                 : 
     666 GIC          90 :     if (funcctx->call_cntr < tsin->size)
     667 ECB             :     {
     668 CBC          75 :         WordEntry  *arrin = ARRPTR(tsin);
     669 GIC          75 :         char       *data = STRPTR(tsin);
     670 ECB             :         HeapTuple   tuple;
     671                 :         int         j,
     672 CBC          75 :                     i = funcctx->call_cntr;
     673              75 :         bool        nulls[] = {false, false, false};
     674                 :         Datum       values[3];
     675                 : 
     676              75 :         values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
     677 ECB             : 
     678 GIC          75 :         if (arrin[i].haspos)
     679                 :         {
     680 ECB             :             WordEntryPosVector *posv;
     681                 :             Datum      *positions;
     682                 :             Datum      *weights;
     683                 :             char        weight;
     684                 : 
     685                 :             /*
     686                 :              * Internally tsvector stores position and weight in the same
     687                 :              * uint16 (2 bits for weight, 14 for position). Here we extract
     688                 :              * that in two separate arrays.
     689                 :              */
     690 GIC          45 :             posv = _POSVECPTR(tsin, arrin + i);
     691              45 :             positions = palloc(posv->npos * sizeof(Datum));
     692              45 :             weights = palloc(posv->npos * sizeof(Datum));
     693             126 :             for (j = 0; j < posv->npos; j++)
     694 ECB             :             {
     695 CBC          81 :                 positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
     696              81 :                 weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
     697              81 :                 weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
     698                 :                                                                       1));
     699 ECB             :             }
     700                 : 
     701 GNC          45 :             values[1] = PointerGetDatum(construct_array_builtin(positions, posv->npos, INT2OID));
     702              45 :             values[2] = PointerGetDatum(construct_array_builtin(weights, posv->npos, TEXTOID));
     703 ECB             :         }
     704                 :         else
     705                 :         {
     706 GIC          30 :             nulls[1] = nulls[2] = true;
     707                 :         }
     708 ECB             : 
     709 GIC          75 :         tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
     710              75 :         SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
     711 ECB             :     }
     712                 :     else
     713                 :     {
     714 GIC          15 :         SRF_RETURN_DONE(funcctx);
     715                 :     }
     716 ECB             : }
     717                 : 
     718                 : /*
     719                 :  * Convert tsvector to array of lexemes.
     720                 :  */
     721                 : Datum
     722 GIC           6 : tsvector_to_array(PG_FUNCTION_ARGS)
     723                 : {
     724 CBC           6 :     TSVector    tsin = PG_GETARG_TSVECTOR(0);
     725 GIC           6 :     WordEntry  *arrin = ARRPTR(tsin);
     726 ECB             :     Datum      *elements;
     727                 :     int         i;
     728                 :     ArrayType  *array;
     729                 : 
     730 GIC           6 :     elements = palloc(tsin->size * sizeof(Datum));
     731                 : 
     732 CBC          36 :     for (i = 0; i < tsin->size; i++)
     733                 :     {
     734              30 :         elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
     735 GIC          30 :                                                                arrin[i].len));
     736 ECB             :     }
     737                 : 
     738 GNC           6 :     array = construct_array_builtin(elements, tsin->size, TEXTOID);
     739                 : 
     740 CBC           6 :     pfree(elements);
     741 GIC           6 :     PG_FREE_IF_COPY(tsin, 0);
     742 CBC           6 :     PG_RETURN_POINTER(array);
     743 ECB             : }
     744                 : 
     745                 : /*
     746                 :  * Build tsvector from array of lexemes.
     747                 :  */
     748                 : Datum
     749 GIC          12 : array_to_tsvector(PG_FUNCTION_ARGS)
     750                 : {
     751 CBC          12 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
     752                 :     TSVector    tsout;
     753 ECB             :     Datum      *dlexemes;
     754                 :     WordEntry  *arrout;
     755                 :     bool       *nulls;
     756                 :     int         nitems,
     757                 :                 i,
     758                 :                 tslen,
     759 GIC          12 :                 datalen = 0;
     760                 :     char       *cur;
     761 ECB             : 
     762 GNC          12 :     deconstruct_array_builtin(v, TEXTOID, &dlexemes, &nulls, &nitems);
     763                 : 
     764 ECB             :     /*
     765                 :      * Reject nulls and zero length strings (maybe we should just ignore them,
     766                 :      * instead?)
     767                 :      */
     768 GIC          63 :     for (i = 0; i < nitems; i++)
     769                 :     {
     770 CBC          57 :         if (nulls[i])
     771 GIC           3 :             ereport(ERROR,
     772 ECB             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     773                 :                      errmsg("lexeme array may not contain nulls")));
     774                 : 
     775 GIC          54 :         if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
     776               3 :             ereport(ERROR,
     777 ECB             :                     (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
     778                 :                      errmsg("lexeme array may not contain empty strings")));
     779                 :     }
     780                 : 
     781                 :     /* Sort and de-dup, because this is required for a valid tsvector. */
     782 GIC           6 :     if (nitems > 1)
     783                 :     {
     784 CBC           6 :         qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
     785 GIC           6 :         nitems = qunique(dlexemes, nitems, sizeof(Datum),
     786 ECB             :                          compare_text_lexemes);
     787                 :     }
     788                 : 
     789                 :     /* Calculate space needed for surviving lexemes. */
     790 GIC          30 :     for (i = 0; i < nitems; i++)
     791              24 :         datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
     792 CBC           6 :     tslen = CALCDATASIZE(nitems, datalen);
     793 ECB             : 
     794                 :     /* Allocate and fill tsvector. */
     795 GIC           6 :     tsout = (TSVector) palloc0(tslen);
     796               6 :     SET_VARSIZE(tsout, tslen);
     797 CBC           6 :     tsout->size = nitems;
     798 ECB             : 
     799 CBC           6 :     arrout = ARRPTR(tsout);
     800 GIC           6 :     cur = STRPTR(tsout);
     801 CBC          30 :     for (i = 0; i < nitems; i++)
     802 ECB             :     {
     803 CBC          24 :         char       *lex = VARDATA(dlexemes[i]);
     804 GIC          24 :         int         lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
     805 ECB             : 
     806 CBC          24 :         memcpy(cur, lex, lex_len);
     807 GIC          24 :         arrout[i].haspos = 0;
     808 CBC          24 :         arrout[i].len = lex_len;
     809              24 :         arrout[i].pos = cur - STRPTR(tsout);
     810              24 :         cur += lex_len;
     811 ECB             :     }
     812                 : 
     813 GIC           6 :     PG_FREE_IF_COPY(v, 0);
     814               6 :     PG_RETURN_POINTER(tsout);
     815 ECB             : }
     816                 : 
     817                 : /*
     818                 :  * ts_filter(): keep only lexemes with given weights in tsvector.
     819                 :  */
     820                 : Datum
     821 GIC           9 : tsvector_filter(PG_FUNCTION_ARGS)
     822                 : {
     823 CBC           9 :     TSVector    tsin = PG_GETARG_TSVECTOR(0),
     824                 :                 tsout;
     825               9 :     ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
     826 GIC           9 :     WordEntry  *arrin = ARRPTR(tsin),
     827 ECB             :                *arrout;
     828 CBC           9 :     char       *datain = STRPTR(tsin),
     829                 :                *dataout;
     830 ECB             :     Datum      *dweights;
     831                 :     bool       *nulls;
     832                 :     int         nweights;
     833                 :     int         i,
     834                 :                 j;
     835 GIC           9 :     int         cur_pos = 0;
     836               9 :     char        mask = 0;
     837 ECB             : 
     838 GNC           9 :     deconstruct_array_builtin(weights, CHAROID, &dweights, &nulls, &nweights);
     839 ECB             : 
     840 GIC          21 :     for (i = 0; i < nweights; i++)
     841 ECB             :     {
     842                 :         char        char_weight;
     843                 : 
     844 GIC          15 :         if (nulls[i])
     845 CBC           3 :             ereport(ERROR,
     846 ECB             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
     847                 :                      errmsg("weight array may not contain nulls")));
     848                 : 
     849 GIC          12 :         char_weight = DatumGetChar(dweights[i]);
     850 CBC          12 :         switch (char_weight)
     851 ECB             :         {
     852 GIC           9 :             case 'A':
     853 ECB             :             case 'a':
     854 GIC           9 :                 mask = mask | 8;
     855 CBC           9 :                 break;
     856               3 :             case 'B':
     857 ECB             :             case 'b':
     858 GIC           3 :                 mask = mask | 4;
     859 CBC           3 :                 break;
     860 LBC           0 :             case 'C':
     861 EUB             :             case 'c':
     862 UIC           0 :                 mask = mask | 2;
     863 UBC           0 :                 break;
     864               0 :             case 'D':
     865 EUB             :             case 'd':
     866 UIC           0 :                 mask = mask | 1;
     867 UBC           0 :                 break;
     868               0 :             default:
     869               0 :                 ereport(ERROR,
     870 EUB             :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     871                 :                          errmsg("unrecognized weight: \"%c\"", char_weight)));
     872                 :         }
     873                 :     }
     874                 : 
     875 GIC           6 :     tsout = (TSVector) palloc0(VARSIZE(tsin));
     876 CBC           6 :     tsout->size = tsin->size;
     877               6 :     arrout = ARRPTR(tsout);
     878               6 :     dataout = STRPTR(tsout);
     879 ECB             : 
     880 GIC          54 :     for (i = j = 0; i < tsin->size; i++)
     881 ECB             :     {
     882                 :         WordEntryPosVector *posvin,
     883                 :                    *posvout;
     884 GIC          48 :         int         npos = 0;
     885 ECB             :         int         k;
     886                 : 
     887 GIC          48 :         if (!arrin[i].haspos)
     888 CBC          15 :             continue;
     889 ECB             : 
     890 GIC          33 :         posvin = _POSVECPTR(tsin, arrin + i);
     891 CBC          33 :         posvout = (WordEntryPosVector *)
     892              33 :             (dataout + SHORTALIGN(cur_pos + arrin[i].len));
     893 ECB             : 
     894 GIC          66 :         for (k = 0; k < posvin->npos; k++)
     895 ECB             :         {
     896 GIC          33 :             if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
     897 CBC          15 :                 posvout->pos[npos++] = posvin->pos[k];
     898 ECB             :         }
     899                 : 
     900                 :         /* if no satisfactory positions found, skip lexeme */
     901 GIC          33 :         if (!npos)
     902 CBC          18 :             continue;
     903 ECB             : 
     904 GIC          15 :         arrout[j].haspos = true;
     905 CBC          15 :         arrout[j].len = arrin[i].len;
     906              15 :         arrout[j].pos = cur_pos;
     907 ECB             : 
     908 GIC          15 :         memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
     909 CBC          15 :         posvout->npos = npos;
     910              15 :         cur_pos += SHORTALIGN(arrin[i].len);
     911              15 :         cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
     912 ECB             :             sizeof(uint16);
     913 GIC          15 :         j++;
     914 ECB             :     }
     915                 : 
     916 GIC           6 :     tsout->size = j;
     917 CBC           6 :     if (dataout != STRPTR(tsout))
     918               6 :         memmove(STRPTR(tsout), dataout, cur_pos);
     919 ECB             : 
     920 GIC           6 :     SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
     921 ECB             : 
     922 GIC           6 :     PG_FREE_IF_COPY(tsin, 0);
     923 CBC           6 :     PG_RETURN_POINTER(tsout);
     924 ECB             : }
     925                 : 
     926                 : Datum
     927 GIC           6 : tsvector_concat(PG_FUNCTION_ARGS)
     928 ECB             : {
     929 GIC           6 :     TSVector    in1 = PG_GETARG_TSVECTOR(0);
     930 CBC           6 :     TSVector    in2 = PG_GETARG_TSVECTOR(1);
     931 ECB             :     TSVector    out;
     932                 :     WordEntry  *ptr;
     933                 :     WordEntry  *ptr1,
     934                 :                *ptr2;
     935                 :     WordEntryPos *p;
     936 GIC           6 :     int         maxpos = 0,
     937 ECB             :                 i,
     938                 :                 j,
     939                 :                 i1,
     940                 :                 i2,
     941                 :                 dataoff,
     942                 :                 output_bytes,
     943                 :                 output_size;
     944                 :     char       *data,
     945                 :                *data1,
     946                 :                *data2;
     947                 : 
     948                 :     /* Get max position in in1; we'll need this to offset in2's positions */
     949 GIC           6 :     ptr = ARRPTR(in1);
     950 CBC           6 :     i = in1->size;
     951              15 :     while (i--)
     952 ECB             :     {
     953 GIC           9 :         if ((j = POSDATALEN(in1, ptr)) != 0)
     954 ECB             :         {
     955 GIC           9 :             p = POSDATAPTR(in1, ptr);
     956 CBC          18 :             while (j--)
     957 ECB             :             {
     958 GIC           9 :                 if (WEP_GETPOS(*p) > maxpos)
     959 CBC           6 :                     maxpos = WEP_GETPOS(*p);
     960               9 :                 p++;
     961 ECB             :             }
     962                 :         }
     963 GIC           9 :         ptr++;
     964 ECB             :     }
     965                 : 
     966 GIC           6 :     ptr1 = ARRPTR(in1);
     967 CBC           6 :     ptr2 = ARRPTR(in2);
     968               6 :     data1 = STRPTR(in1);
     969               6 :     data2 = STRPTR(in2);
     970               6 :     i1 = in1->size;
     971               6 :     i2 = in2->size;
     972 ECB             : 
     973                 :     /*
     974                 :      * Conservative estimate of space needed.  We might need all the data in
     975                 :      * both inputs, and conceivably add a pad byte before position data for
     976                 :      * each item where there was none before.
     977                 :      */
     978 GIC           6 :     output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
     979 ECB             : 
     980 GIC           6 :     out = (TSVector) palloc0(output_bytes);
     981 CBC           6 :     SET_VARSIZE(out, output_bytes);
     982 ECB             : 
     983                 :     /*
     984                 :      * We must make out->size valid so that STRPTR(out) is sensible.  We'll
     985                 :      * collapse out any unused space at the end.
     986                 :      */
     987 GIC           6 :     out->size = in1->size + in2->size;
     988 ECB             : 
     989 GIC           6 :     ptr = ARRPTR(out);
     990 CBC           6 :     data = STRPTR(out);
     991               6 :     dataoff = 0;
     992              15 :     while (i1 && i2)
     993 ECB             :     {
     994 GIC           9 :         int         cmp = compareEntry(data1, ptr1, data2, ptr2);
     995 ECB             : 
     996 GIC           9 :         if (cmp < 0)
     997 ECB             :         {                       /* in1 first */
     998 GIC           3 :             ptr->haspos = ptr1->haspos;
     999 CBC           3 :             ptr->len = ptr1->len;
    1000               3 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1001               3 :             ptr->pos = dataoff;
    1002               3 :             dataoff += ptr1->len;
    1003               3 :             if (ptr->haspos)
    1004 ECB             :             {
    1005 GIC           3 :                 dataoff = SHORTALIGN(dataoff);
    1006 CBC           3 :                 memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1007               3 :                 dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1008 ECB             :             }
    1009                 : 
    1010 GIC           3 :             ptr++;
    1011 CBC           3 :             ptr1++;
    1012               3 :             i1--;
    1013 ECB             :         }
    1014 GIC           6 :         else if (cmp > 0)
    1015 ECB             :         {                       /* in2 first */
    1016 GIC           3 :             ptr->haspos = ptr2->haspos;
    1017 CBC           3 :             ptr->len = ptr2->len;
    1018               3 :             memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1019               3 :             ptr->pos = dataoff;
    1020               3 :             dataoff += ptr2->len;
    1021               3 :             if (ptr->haspos)
    1022 ECB             :             {
    1023 UIC           0 :                 int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1024 EUB             : 
    1025 UIC           0 :                 if (addlen == 0)
    1026 UBC           0 :                     ptr->haspos = 0;
    1027 EUB             :                 else
    1028                 :                 {
    1029 UIC           0 :                     dataoff = SHORTALIGN(dataoff);
    1030 UBC           0 :                     dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1031 EUB             :                 }
    1032                 :             }
    1033                 : 
    1034 GIC           3 :             ptr++;
    1035 CBC           3 :             ptr2++;
    1036               3 :             i2--;
    1037 ECB             :         }
    1038                 :         else
    1039                 :         {
    1040 GIC           3 :             ptr->haspos = ptr1->haspos | ptr2->haspos;
    1041 CBC           3 :             ptr->len = ptr1->len;
    1042               3 :             memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1043               3 :             ptr->pos = dataoff;
    1044               3 :             dataoff += ptr1->len;
    1045               3 :             if (ptr->haspos)
    1046 ECB             :             {
    1047 GIC           3 :                 if (ptr1->haspos)
    1048 ECB             :                 {
    1049 GIC           3 :                     dataoff = SHORTALIGN(dataoff);
    1050 CBC           3 :                     memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1051               3 :                     dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1052               3 :                     if (ptr2->haspos)
    1053               3 :                         dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
    1054 ECB             :                 }
    1055                 :                 else            /* must have ptr2->haspos */
    1056                 :                 {
    1057 UIC           0 :                     int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1058 EUB             : 
    1059 UIC           0 :                     if (addlen == 0)
    1060 UBC           0 :                         ptr->haspos = 0;
    1061 EUB             :                     else
    1062                 :                     {
    1063 UIC           0 :                         dataoff = SHORTALIGN(dataoff);
    1064 UBC           0 :                         dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1065 EUB             :                     }
    1066                 :                 }
    1067                 :             }
    1068                 : 
    1069 GIC           3 :             ptr++;
    1070 CBC           3 :             ptr1++;
    1071               3 :             ptr2++;
    1072               3 :             i1--;
    1073               3 :             i2--;
    1074 ECB             :         }
    1075                 :     }
    1076                 : 
    1077 GIC           9 :     while (i1)
    1078 ECB             :     {
    1079 GIC           3 :         ptr->haspos = ptr1->haspos;
    1080 CBC           3 :         ptr->len = ptr1->len;
    1081               3 :         memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
    1082               3 :         ptr->pos = dataoff;
    1083               3 :         dataoff += ptr1->len;
    1084               3 :         if (ptr->haspos)
    1085 ECB             :         {
    1086 GIC           3 :             dataoff = SHORTALIGN(dataoff);
    1087 CBC           3 :             memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
    1088               3 :             dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
    1089 ECB             :         }
    1090                 : 
    1091 GIC           3 :         ptr++;
    1092 CBC           3 :         ptr1++;
    1093               3 :         i1--;
    1094 ECB             :     }
    1095                 : 
    1096 GIC           9 :     while (i2)
    1097 ECB             :     {
    1098 GIC           3 :         ptr->haspos = ptr2->haspos;
    1099 CBC           3 :         ptr->len = ptr2->len;
    1100               3 :         memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
    1101               3 :         ptr->pos = dataoff;
    1102               3 :         dataoff += ptr2->len;
    1103               3 :         if (ptr->haspos)
    1104 ECB             :         {
    1105 GIC           3 :             int         addlen = add_pos(in2, ptr2, out, ptr, maxpos);
    1106 ECB             : 
    1107 GIC           3 :             if (addlen == 0)
    1108 LBC           0 :                 ptr->haspos = 0;
    1109 EUB             :             else
    1110                 :             {
    1111 GIC           3 :                 dataoff = SHORTALIGN(dataoff);
    1112 CBC           3 :                 dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
    1113 ECB             :             }
    1114                 :         }
    1115                 : 
    1116 GIC           3 :         ptr++;
    1117 CBC           3 :         ptr2++;
    1118               3 :         i2--;
    1119 ECB             :     }
    1120                 : 
    1121                 :     /*
    1122                 :      * Instead of checking each offset individually, we check for overflow of
    1123                 :      * pos fields once at the end.
    1124                 :      */
    1125 GIC           6 :     if (dataoff > MAXSTRPOS)
    1126 LBC           0 :         ereport(ERROR,
    1127 EUB             :                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1128                 :                  errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
    1129                 : 
    1130                 :     /*
    1131                 :      * Adjust sizes (asserting that we didn't overrun the original estimates)
    1132                 :      * and collapse out any unused array entries.
    1133                 :      */
    1134 GIC           6 :     output_size = ptr - ARRPTR(out);
    1135 CBC           6 :     Assert(output_size <= out->size);
    1136               6 :     out->size = output_size;
    1137               6 :     if (data != STRPTR(out))
    1138               3 :         memmove(STRPTR(out), data, dataoff);
    1139               6 :     output_bytes = CALCDATASIZE(out->size, dataoff);
    1140               6 :     Assert(output_bytes <= VARSIZE(out));
    1141               6 :     SET_VARSIZE(out, output_bytes);
    1142 ECB             : 
    1143 GIC           6 :     PG_FREE_IF_COPY(in1, 0);
    1144 CBC           6 :     PG_FREE_IF_COPY(in2, 1);
    1145               6 :     PG_RETURN_POINTER(out);
    1146 ECB             : }
    1147                 : 
    1148                 : /*
    1149                 :  * Compare two strings by tsvector rules.
    1150                 :  *
    1151                 :  * if prefix = true then it returns zero value iff b has prefix a
    1152                 :  */
    1153                 : int32
    1154 GIC     3107679 : tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
    1155 ECB             : {
    1156                 :     int         cmp;
    1157                 : 
    1158 GIC     3107679 :     if (lena == 0)
    1159 ECB             :     {
    1160 GIC          18 :         if (prefix)
    1161 LBC           0 :             cmp = 0;            /* empty string is prefix of anything */
    1162 EUB             :         else
    1163 GIC          18 :             cmp = (lenb > 0) ? -1 : 0;
    1164 ECB             :     }
    1165 GIC     3107661 :     else if (lenb == 0)
    1166 ECB             :     {
    1167 UIC           0 :         cmp = (lena > 0) ? 1 : 0;
    1168 EUB             :     }
    1169                 :     else
    1170                 :     {
    1171 GIC     3107661 :         cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
    1172 ECB             : 
    1173 GIC     3107661 :         if (prefix)
    1174 ECB             :         {
    1175 GIC        8229 :             if (cmp == 0 && lena > lenb)
    1176 LBC           0 :                 cmp = 1;        /* a is longer, so not a prefix of b */
    1177 EUB             :         }
    1178 GIC     3099432 :         else if (cmp == 0 && lena != lenb)
    1179 ECB             :         {
    1180 GIC       16134 :             cmp = (lena < lenb) ? -1 : 1;
    1181 ECB             :         }
    1182                 :     }
    1183                 : 
    1184 GIC     3107679 :     return cmp;
    1185 ECB             : }
    1186                 : 
    1187                 : /*
    1188                 :  * Check weight info or/and fill 'data' with the required positions
    1189                 :  */
    1190                 : static TSTernaryValue
    1191 GIC       34041 : checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
    1192 ECB             :                ExecPhraseData *data)
    1193                 : {
    1194 GIC       34041 :     TSTernaryValue result = TS_NO;
    1195 ECB             : 
    1196 GIC       34041 :     Assert(data == NULL || data->npos == 0);
    1197 ECB             : 
    1198 GIC       34041 :     if (entry->haspos)
    1199 ECB             :     {
    1200                 :         WordEntryPosVector *posvec;
    1201                 : 
    1202                 :         /*
    1203                 :          * We can't use the _POSVECPTR macro here because the pointer to the
    1204                 :          * tsvector's lexeme storage is already contained in chkval->values.
    1205                 :          */
    1206 GIC        2244 :         posvec = (WordEntryPosVector *)
    1207 CBC        2244 :             (chkval->values + SHORTALIGN(entry->pos + entry->len));
    1208 ECB             : 
    1209 GIC        2244 :         if (val->weight && data)
    1210 CBC          24 :         {
    1211              24 :             WordEntryPos *posvec_iter = posvec->pos;
    1212 ECB             :             WordEntryPos *dptr;
    1213                 : 
    1214                 :             /*
    1215                 :              * Filter position information by weights
    1216                 :              */
    1217 GIC          24 :             dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
    1218 CBC          24 :             data->allocated = true;
    1219 ECB             : 
    1220                 :             /* Is there a position with a matching weight? */
    1221 GIC          48 :             while (posvec_iter < posvec->pos + posvec->npos)
    1222 ECB             :             {
    1223                 :                 /* If true, append this position to the data->pos */
    1224 GIC          24 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1225 ECB             :                 {
    1226 GIC          12 :                     *dptr = WEP_GETPOS(*posvec_iter);
    1227 CBC          12 :                     dptr++;
    1228 ECB             :                 }
    1229                 : 
    1230 GIC          24 :                 posvec_iter++;
    1231 ECB             :             }
    1232                 : 
    1233 GIC          24 :             data->npos = dptr - data->pos;
    1234 ECB             : 
    1235 GIC          24 :             if (data->npos > 0)
    1236 CBC          12 :                 result = TS_YES;
    1237 ECB             :             else
    1238                 :             {
    1239 GIC          12 :                 pfree(data->pos);
    1240 CBC          12 :                 data->pos = NULL;
    1241              12 :                 data->allocated = false;
    1242 ECB             :             }
    1243                 :         }
    1244 GIC        2220 :         else if (val->weight)
    1245 ECB             :         {
    1246 GIC         228 :             WordEntryPos *posvec_iter = posvec->pos;
    1247 ECB             : 
    1248                 :             /* Is there a position with a matching weight? */
    1249 GIC         345 :             while (posvec_iter < posvec->pos + posvec->npos)
    1250 ECB             :             {
    1251 GIC         252 :                 if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
    1252 ECB             :                 {
    1253 GIC         135 :                     result = TS_YES;
    1254 CBC         135 :                     break;      /* no need to go further */
    1255 ECB             :                 }
    1256                 : 
    1257 GIC         117 :                 posvec_iter++;
    1258 ECB             :             }
    1259                 :         }
    1260 GIC        1992 :         else if (data)
    1261 ECB             :         {
    1262 GIC        1137 :             data->npos = posvec->npos;
    1263 CBC        1137 :             data->pos = posvec->pos;
    1264            1137 :             data->allocated = false;
    1265            1137 :             result = TS_YES;
    1266 ECB             :         }
    1267                 :         else
    1268                 :         {
    1269                 :             /* simplest case: no weight check, positions not needed */
    1270 GIC         855 :             result = TS_YES;
    1271 ECB             :         }
    1272                 :     }
    1273                 :     else
    1274                 :     {
    1275                 :         /*
    1276                 :          * Position info is lacking, so if the caller requires it, we can only
    1277                 :          * say that maybe there is a match.
    1278                 :          *
    1279                 :          * Notice, however, that we *don't* check val->weight here.
    1280                 :          * Historically, stripped tsvectors are considered to match queries
    1281                 :          * whether or not the query has a weight restriction; that's a little
    1282                 :          * dubious but we'll preserve the behavior.
    1283                 :          */
    1284 GIC       31797 :         if (data)
    1285 CBC       11529 :             result = TS_MAYBE;
    1286 ECB             :         else
    1287 GIC       20268 :             result = TS_YES;
    1288 ECB             :     }
    1289                 : 
    1290 GIC       34041 :     return result;
    1291 ECB             : }
    1292                 : 
    1293                 : /*
    1294                 :  * TS_execute callback for matching a tsquery operand to plain tsvector data
    1295                 :  */
    1296                 : static TSTernaryValue
    1297 GIC      142011 : checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
    1298 ECB             : {
    1299 GIC      142011 :     CHKVAL     *chkval = (CHKVAL *) checkval;
    1300 CBC      142011 :     WordEntry  *StopLow = chkval->arrb;
    1301          142011 :     WordEntry  *StopHigh = chkval->arre;
    1302          142011 :     WordEntry  *StopMiddle = StopHigh;
    1303          142011 :     TSTernaryValue res = TS_NO;
    1304 ECB             : 
    1305                 :     /* Loop invariant: StopLow <= val < StopHigh */
    1306 GIC      893403 :     while (StopLow < StopHigh)
    1307 ECB             :     {
    1308                 :         int         difference;
    1309                 : 
    1310 GIC      777915 :         StopMiddle = StopLow + (StopHigh - StopLow) / 2;
    1311 CBC      777915 :         difference = tsCompareString(chkval->operand + val->distance,
    1312          777915 :                                      val->length,
    1313          777915 :                                      chkval->values + StopMiddle->pos,
    1314          777915 :                                      StopMiddle->len,
    1315 ECB             :                                      false);
    1316                 : 
    1317 GIC      777915 :         if (difference == 0)
    1318 ECB             :         {
    1319                 :             /* Check weight info & fill 'data' with positions */
    1320 GIC       26523 :             res = checkclass_str(chkval, StopMiddle, val, data);
    1321 CBC       26523 :             break;
    1322 ECB             :         }
    1323 GIC      751392 :         else if (difference > 0)
    1324 CBC      423756 :             StopLow = StopMiddle + 1;
    1325 ECB             :         else
    1326 GIC      327636 :             StopHigh = StopMiddle;
    1327 ECB             :     }
    1328                 : 
    1329                 :     /*
    1330                 :      * If it's a prefix search, we should also consider lexemes that the
    1331                 :      * search term is a prefix of (which will necessarily immediately follow
    1332                 :      * the place we found in the above loop).  But we can skip them if there
    1333                 :      * was a definite match on the exact term AND the caller doesn't need
    1334                 :      * position info.
    1335                 :      */
    1336 GIC      142011 :     if (val->prefix && (res != TS_YES || data))
    1337 ECB             :     {
    1338 GIC        8262 :         WordEntryPos *allpos = NULL;
    1339 CBC        8262 :         int         npos = 0,
    1340            8262 :                     totalpos = 0;
    1341 ECB             : 
    1342                 :         /* adjust start position for corner case */
    1343 GIC        8262 :         if (StopLow >= StopHigh)
    1344 CBC        8256 :             StopMiddle = StopHigh;
    1345 ECB             : 
    1346                 :         /* we don't try to re-use any data from the initial match */
    1347 GIC        8262 :         if (data)
    1348 ECB             :         {
    1349 GIC          18 :             if (data->allocated)
    1350 LBC           0 :                 pfree(data->pos);
    1351 GBC          18 :             data->pos = NULL;
    1352 CBC          18 :             data->allocated = false;
    1353              18 :             data->npos = 0;
    1354 ECB             :         }
    1355 GIC        8262 :         res = TS_NO;
    1356 ECB             : 
    1357 GIC       15729 :         while ((res != TS_YES || data) &&
    1358 CBC       23745 :                StopMiddle < chkval->arre &&
    1359            7965 :                tsCompareString(chkval->operand + val->distance,
    1360            7965 :                                val->length,
    1361            7965 :                                chkval->values + StopMiddle->pos,
    1362            7965 :                                StopMiddle->len,
    1363 ECB             :                                true) == 0)
    1364                 :         {
    1365                 :             TSTernaryValue subres;
    1366                 : 
    1367 GIC        7518 :             subres = checkclass_str(chkval, StopMiddle, val, data);
    1368 ECB             : 
    1369 GIC        7518 :             if (subres != TS_NO)
    1370 ECB             :             {
    1371 GIC        7488 :                 if (data)
    1372 ECB             :                 {
    1373                 :                     /*
    1374                 :                      * We need to join position information
    1375                 :                      */
    1376 GIC          21 :                     if (subres == TS_MAYBE)
    1377 ECB             :                     {
    1378                 :                         /*
    1379                 :                          * No position info for this match, so we must report
    1380                 :                          * MAYBE overall.
    1381                 :                          */
    1382 UIC           0 :                         res = TS_MAYBE;
    1383 EUB             :                         /* forget any previous positions */
    1384 UIC           0 :                         npos = 0;
    1385 EUB             :                         /* don't leak storage */
    1386 UIC           0 :                         if (allpos)
    1387 UBC           0 :                             pfree(allpos);
    1388               0 :                         break;
    1389 EUB             :                     }
    1390                 : 
    1391 GIC          39 :                     while (npos + data->npos > totalpos)
    1392 ECB             :                     {
    1393 GIC          18 :                         if (totalpos == 0)
    1394 ECB             :                         {
    1395 GIC          18 :                             totalpos = 256;
    1396 CBC          18 :                             allpos = palloc(sizeof(WordEntryPos) * totalpos);
    1397 ECB             :                         }
    1398                 :                         else
    1399                 :                         {
    1400 UIC           0 :                             totalpos *= 2;
    1401 UBC           0 :                             allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
    1402 EUB             :                         }
    1403                 :                     }
    1404                 : 
    1405 GIC          21 :                     memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
    1406 CBC          21 :                     npos += data->npos;
    1407 ECB             : 
    1408                 :                     /* don't leak storage from individual matches */
    1409 GIC          21 :                     if (data->allocated)
    1410 CBC          12 :                         pfree(data->pos);
    1411              21 :                     data->pos = NULL;
    1412              21 :                     data->allocated = false;
    1413 ECB             :                     /* it's important to reset data->npos before next loop */
    1414 GIC          21 :                     data->npos = 0;
    1415 ECB             :                 }
    1416                 :                 else
    1417                 :                 {
    1418                 :                     /* Don't need positions, just handle YES/MAYBE */
    1419 GIC        7467 :                     if (subres == TS_YES || res == TS_NO)
    1420 CBC        7467 :                         res = subres;
    1421 ECB             :                 }
    1422                 :             }
    1423                 : 
    1424 GIC        7518 :             StopMiddle++;
    1425 ECB             :         }
    1426                 : 
    1427 GIC        8262 :         if (data && npos > 0)
    1428 ECB             :         {
    1429                 :             /* Sort and make unique array of found positions */
    1430 GIC          18 :             data->pos = allpos;
    1431 CBC          18 :             qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
    1432              18 :             data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
    1433 ECB             :                                  compareWordEntryPos);
    1434 GIC          18 :             data->allocated = true;
    1435 CBC          18 :             res = TS_YES;
    1436 ECB             :         }
    1437                 :     }
    1438                 : 
    1439 GIC      142011 :     return res;
    1440 ECB             : }
    1441                 : 
    1442                 : /*
    1443                 :  * Compute output position list for a tsquery operator in phrase mode.
    1444                 :  *
    1445                 :  * Merge the position lists in Ldata and Rdata as specified by "emit",
    1446                 :  * returning the result list into *data.  The input position lists must be
    1447                 :  * sorted and unique, and the output will be as well.
    1448                 :  *
    1449                 :  * data: pointer to initially-all-zeroes output struct, or NULL
    1450                 :  * Ldata, Rdata: input position lists
    1451                 :  * emit: bitmask of TSPO_XXX flags
    1452                 :  * Loffset: offset to be added to Ldata positions before comparing/outputting
    1453                 :  * Roffset: offset to be added to Rdata positions before comparing/outputting
    1454                 :  * max_npos: maximum possible required size of output position array
    1455                 :  *
    1456                 :  * Loffset and Roffset should not be negative, else we risk trying to output
    1457                 :  * negative positions, which won't fit into WordEntryPos.
    1458                 :  *
    1459                 :  * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
    1460                 :  * we return it as TSTernaryValue.
    1461                 :  *
    1462                 :  * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
    1463                 :  * returns TS_YES if any positions would have been emitted.
    1464                 :  */
    1465                 : #define TSPO_L_ONLY     0x01    /* emit positions appearing only in L */
    1466                 : #define TSPO_R_ONLY     0x02    /* emit positions appearing only in R */
    1467                 : #define TSPO_BOTH       0x04    /* emit positions appearing in both L&R */
    1468                 : 
    1469                 : static TSTernaryValue
    1470 GIC       14982 : TS_phrase_output(ExecPhraseData *data,
    1471 ECB             :                  ExecPhraseData *Ldata,
    1472                 :                  ExecPhraseData *Rdata,
    1473                 :                  int emit,
    1474                 :                  int Loffset,
    1475                 :                  int Roffset,
    1476                 :                  int max_npos)
    1477                 : {
    1478                 :     int         Lindex,
    1479                 :                 Rindex;
    1480                 : 
    1481                 :     /* Loop until both inputs are exhausted */
    1482 GIC       14982 :     Lindex = Rindex = 0;
    1483 CBC       15498 :     while (Lindex < Ldata->npos || Rindex < Rdata->npos)
    1484 ECB             :     {
    1485                 :         int         Lpos,
    1486                 :                     Rpos;
    1487 GIC        1167 :         int         output_pos = 0;
    1488 ECB             : 
    1489                 :         /*
    1490                 :          * Fetch current values to compare.  WEP_GETPOS() is needed because
    1491                 :          * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
    1492                 :          */
    1493 GIC        1167 :         if (Lindex < Ldata->npos)
    1494 CBC         843 :             Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
    1495 ECB             :         else
    1496                 :         {
    1497                 :             /* L array exhausted, so we're done if R_ONLY isn't set */
    1498 GIC         324 :             if (!(emit & TSPO_R_ONLY))
    1499 CBC          75 :                 break;
    1500             249 :             Lpos = INT_MAX;
    1501 ECB             :         }
    1502 GIC        1092 :         if (Rindex < Rdata->npos)
    1503 CBC         969 :             Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
    1504 ECB             :         else
    1505                 :         {
    1506                 :             /* R array exhausted, so we're done if L_ONLY isn't set */
    1507 GIC         123 :             if (!(emit & TSPO_L_ONLY))
    1508 CBC          81 :                 break;
    1509              42 :             Rpos = INT_MAX;
    1510 ECB             :         }
    1511                 : 
    1512                 :         /* Merge-join the two input lists */
    1513 GIC        1011 :         if (Lpos < Rpos)
    1514 ECB             :         {
    1515                 :             /* Lpos is not matched in Rdata, should we output it? */
    1516 GIC         243 :             if (emit & TSPO_L_ONLY)
    1517 CBC          72 :                 output_pos = Lpos;
    1518             243 :             Lindex++;
    1519 ECB             :         }
    1520 GIC         768 :         else if (Lpos == Rpos)
    1521 ECB             :         {
    1522                 :             /* Lpos and Rpos match ... should we output it? */
    1523 GIC         399 :             if (emit & TSPO_BOTH)
    1524 CBC         351 :                 output_pos = Rpos;
    1525             399 :             Lindex++;
    1526             399 :             Rindex++;
    1527 ECB             :         }
    1528                 :         else                    /* Lpos > Rpos */
    1529                 :         {
    1530                 :             /* Rpos is not matched in Ldata, should we output it? */
    1531 GIC         369 :             if (emit & TSPO_R_ONLY)
    1532 CBC         270 :                 output_pos = Rpos;
    1533             369 :             Rindex++;
    1534 ECB             :         }
    1535                 : 
    1536 GIC        1011 :         if (output_pos > 0)
    1537 ECB             :         {
    1538 GIC         693 :             if (data)
    1539 ECB             :             {
    1540                 :                 /* Store position, first allocating output array if needed */
    1541 GIC         198 :                 if (data->pos == NULL)
    1542 ECB             :                 {
    1543 GIC         159 :                     data->pos = (WordEntryPos *)
    1544 CBC         159 :                         palloc(max_npos * sizeof(WordEntryPos));
    1545             159 :                     data->allocated = true;
    1546 ECB             :                 }
    1547 GIC         198 :                 data->pos[data->npos++] = output_pos;
    1548 ECB             :             }
    1549                 :             else
    1550                 :             {
    1551                 :                 /*
    1552                 :                  * Exact positions not needed, so return TS_YES as soon as we
    1553                 :                  * know there is at least one.
    1554                 :                  */
    1555 GIC         495 :                 return TS_YES;
    1556 ECB             :             }
    1557                 :         }
    1558                 :     }
    1559                 : 
    1560 GIC       14487 :     if (data && data->npos > 0)
    1561 ECB             :     {
    1562                 :         /* Let's assert we didn't overrun the array */
    1563 GIC         159 :         Assert(data->npos <= max_npos);
    1564 CBC         159 :         return TS_YES;
    1565 ECB             :     }
    1566 GIC       14328 :     return TS_NO;
    1567 ECB             : }
    1568                 : 
    1569                 : /*
    1570                 :  * Execute tsquery at or below an OP_PHRASE operator.
    1571                 :  *
    1572                 :  * This handles tsquery execution at recursion levels where we need to care
    1573                 :  * about match locations.
    1574                 :  *
    1575                 :  * In addition to the same arguments used for TS_execute, the caller may pass
    1576                 :  * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
    1577                 :  * match position info on success.  data == NULL if no position data need be
    1578                 :  * returned.
    1579                 :  * Note: the function assumes data != NULL for operators other than OP_PHRASE.
    1580                 :  * This is OK because an outside call always starts from an OP_PHRASE node,
    1581                 :  * and all internal recursion cases pass data != NULL.
    1582                 :  *
    1583                 :  * The detailed semantics of the match data, given that the function returned
    1584                 :  * TS_YES (successful match), are:
    1585                 :  *
    1586                 :  * npos > 0, negate = false:
    1587                 :  *   query is matched at specified position(s) (and only those positions)
    1588                 :  * npos > 0, negate = true:
    1589                 :  *   query is matched at all positions *except* specified position(s)
    1590                 :  * npos = 0, negate = true:
    1591                 :  *   query is matched at all positions
    1592                 :  * npos = 0, negate = false:
    1593                 :  *   disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
    1594                 :  *
    1595                 :  * Successful matches also return a "width" value which is the match width in
    1596                 :  * lexemes, less one.  Hence, "width" is zero for simple one-lexeme matches,
    1597                 :  * and is the sum of the phrase operator distances for phrase matches.  Note
    1598                 :  * that when width > 0, the listed positions represent the ends of matches not
    1599                 :  * the starts.  (This unintuitive rule is needed to avoid possibly generating
    1600                 :  * negative positions, which wouldn't fit into the WordEntryPos arrays.)
    1601                 :  *
    1602                 :  * If the TSExecuteCallback function reports that an operand is present
    1603                 :  * but fails to provide position(s) for it, we will return TS_MAYBE when
    1604                 :  * it is possible but not certain that the query is matched.
    1605                 :  *
    1606                 :  * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
    1607                 :  * negate = false (which is the state initialized by the caller); but the
    1608                 :  * "width" output in such cases is undefined.
    1609                 :  */
    1610                 : static TSTernaryValue
    1611 GIC      351271 : TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
    1612 ECB             :                   TSExecuteCallback chkcond,
    1613                 :                   ExecPhraseData *data)
    1614                 : {
    1615                 :     ExecPhraseData Ldata,
    1616                 :                 Rdata;
    1617                 :     TSTernaryValue lmatch,
    1618                 :                 rmatch;
    1619                 :     int         Loffset,
    1620                 :                 Roffset,
    1621                 :                 maxwidth;
    1622                 : 
    1623                 :     /* since this function recurses, it could be driven to stack overflow */
    1624 GIC      351271 :     check_stack_depth();
    1625 ECB             : 
    1626                 :     /* ... and let's check for query cancel while we're at it */
    1627 GIC      351271 :     CHECK_FOR_INTERRUPTS();
    1628 ECB             : 
    1629 GIC      351271 :     if (curitem->type == QI_VAL)
    1630 CBC      172801 :         return chkcond(arg, (QueryOperand *) curitem, data);
    1631 ECB             : 
    1632 GIC      178470 :     switch (curitem->qoperator.oper)
    1633 ECB             :     {
    1634 GIC       60471 :         case OP_NOT:
    1635 ECB             : 
    1636                 :             /*
    1637                 :              * We need not touch data->width, since a NOT operation does not
    1638                 :              * change the match width.
    1639                 :              */
    1640 GIC       60471 :             if (flags & TS_EXEC_SKIP_NOT)
    1641 ECB             :             {
    1642                 :                 /* with SKIP_NOT, report NOT as "match everywhere" */
    1643 UIC           0 :                 Assert(data->npos == 0 && !data->negate);
    1644 UBC           0 :                 data->negate = true;
    1645               0 :                 return TS_YES;
    1646 EUB             :             }
    1647 GIC       60471 :             switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
    1648 ECB             :             {
    1649 GIC       52853 :                 case TS_NO:
    1650 ECB             :                     /* change "match nowhere" to "match everywhere" */
    1651 GIC       52853 :                     Assert(data->npos == 0 && !data->negate);
    1652 CBC       52853 :                     data->negate = true;
    1653           52853 :                     return TS_YES;
    1654             195 :                 case TS_YES:
    1655             195 :                     if (data->npos > 0)
    1656 ECB             :                     {
    1657                 :                         /* we have some positions, invert negate flag */
    1658 GIC         192 :                         data->negate = !data->negate;
    1659 CBC         192 :                         return TS_YES;
    1660 ECB             :                     }
    1661 GIC           3 :                     else if (data->negate)
    1662 ECB             :                     {
    1663                 :                         /* change "match everywhere" to "match nowhere" */
    1664 GIC           3 :                         data->negate = false;
    1665 CBC           3 :                         return TS_NO;
    1666 ECB             :                     }
    1667                 :                     /* Should not get here if result was TS_YES */
    1668 UIC           0 :                     Assert(false);
    1669 EUB             :                     break;
    1670 GIC        7423 :                 case TS_MAYBE:
    1671 ECB             :                     /* match positions are, and remain, uncertain */
    1672 GIC        7423 :                     return TS_MAYBE;
    1673 ECB             :             }
    1674 UIC           0 :             break;
    1675 EUB             : 
    1676 GIC      117921 :         case OP_PHRASE:
    1677 ECB             :         case OP_AND:
    1678 GIC      117921 :             memset(&Ldata, 0, sizeof(Ldata));
    1679 CBC      117921 :             memset(&Rdata, 0, sizeof(Rdata));
    1680 ECB             : 
    1681 GIC      117921 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1682 ECB             :                                        arg, flags, chkcond, &Ldata);
    1683 GIC      117921 :             if (lmatch == TS_NO)
    1684 CBC       62993 :                 return TS_NO;
    1685 ECB             : 
    1686 GIC       54928 :             rmatch = TS_phrase_execute(curitem + 1,
    1687 ECB             :                                        arg, flags, chkcond, &Rdata);
    1688 GIC       54928 :             if (rmatch == TS_NO)
    1689 CBC       27110 :                 return TS_NO;
    1690 ECB             : 
    1691                 :             /*
    1692                 :              * If either operand has no position information, then we can't
    1693                 :              * return reliable position data, only a MAYBE result.
    1694                 :              */
    1695 GIC       27818 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1696 CBC       12914 :                 return TS_MAYBE;
    1697 ECB             : 
    1698 GIC       14904 :             if (curitem->qoperator.oper == OP_PHRASE)
    1699 ECB             :             {
    1700                 :                 /*
    1701                 :                  * Compute Loffset and Roffset suitable for phrase match, and
    1702                 :                  * compute overall width of whole phrase match.
    1703                 :                  */
    1704 GIC       14901 :                 Loffset = curitem->qoperator.distance + Rdata.width;
    1705 CBC       14901 :                 Roffset = 0;
    1706           14901 :                 if (data)
    1707              93 :                     data->width = curitem->qoperator.distance +
    1708              93 :                         Ldata.width + Rdata.width;
    1709 ECB             :             }
    1710                 :             else
    1711                 :             {
    1712                 :                 /*
    1713                 :                  * For OP_AND, set output width and alignment like OP_OR (see
    1714                 :                  * comment below)
    1715                 :                  */
    1716 GIC           3 :                 maxwidth = Max(Ldata.width, Rdata.width);
    1717 CBC           3 :                 Loffset = maxwidth - Ldata.width;
    1718               3 :                 Roffset = maxwidth - Rdata.width;
    1719               3 :                 if (data)
    1720               3 :                     data->width = maxwidth;
    1721 ECB             :             }
    1722                 : 
    1723 GIC       14904 :             if (Ldata.negate && Rdata.negate)
    1724 ECB             :             {
    1725                 :                 /* !L & !R: treat as !(L | R) */
    1726 GIC       14217 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1727 ECB             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1728                 :                                         Loffset, Roffset,
    1729 GIC       14217 :                                         Ldata.npos + Rdata.npos);
    1730 CBC       14217 :                 if (data)
    1731 LBC           0 :                     data->negate = true;
    1732 GBC       14217 :                 return TS_YES;
    1733 ECB             :             }
    1734 GIC         687 :             else if (Ldata.negate)
    1735 ECB             :             {
    1736                 :                 /* !L & R */
    1737 GIC         225 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1738 ECB             :                                         TSPO_R_ONLY,
    1739                 :                                         Loffset, Roffset,
    1740                 :                                         Rdata.npos);
    1741                 :             }
    1742 GIC         462 :             else if (Rdata.negate)
    1743 ECB             :             {
    1744                 :                 /* L & !R */
    1745 GIC           3 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1746 ECB             :                                         TSPO_L_ONLY,
    1747                 :                                         Loffset, Roffset,
    1748                 :                                         Ldata.npos);
    1749                 :             }
    1750                 :             else
    1751                 :             {
    1752                 :                 /* straight AND */
    1753 GIC         459 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1754 ECB             :                                         TSPO_BOTH,
    1755                 :                                         Loffset, Roffset,
    1756 GIC         459 :                                         Min(Ldata.npos, Rdata.npos));
    1757 ECB             :             }
    1758                 : 
    1759 GIC          78 :         case OP_OR:
    1760 CBC          78 :             memset(&Ldata, 0, sizeof(Ldata));
    1761              78 :             memset(&Rdata, 0, sizeof(Rdata));
    1762 ECB             : 
    1763 GIC          78 :             lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
    1764 ECB             :                                        arg, flags, chkcond, &Ldata);
    1765 GIC          78 :             rmatch = TS_phrase_execute(curitem + 1,
    1766 ECB             :                                        arg, flags, chkcond, &Rdata);
    1767                 : 
    1768 GIC          78 :             if (lmatch == TS_NO && rmatch == TS_NO)
    1769 CBC           6 :                 return TS_NO;
    1770 ECB             : 
    1771                 :             /*
    1772                 :              * If either operand has no position information, then we can't
    1773                 :              * return reliable position data, only a MAYBE result.
    1774                 :              */
    1775 GIC          72 :             if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
    1776 LBC           0 :                 return TS_MAYBE;
    1777 EUB             : 
    1778                 :             /*
    1779                 :              * Cope with undefined output width from failed submatch.  (This
    1780                 :              * takes less code than trying to ensure that all failure returns
    1781                 :              * set data->width to zero.)
    1782                 :              */
    1783 GIC          72 :             if (lmatch == TS_NO)
    1784 CBC           9 :                 Ldata.width = 0;
    1785              72 :             if (rmatch == TS_NO)
    1786              42 :                 Rdata.width = 0;
    1787 ECB             : 
    1788                 :             /*
    1789                 :              * For OP_AND and OP_OR, report the width of the wider of the two
    1790                 :              * inputs, and align the narrower input's positions to the right
    1791                 :              * end of that width.  This rule deals at least somewhat
    1792                 :              * reasonably with cases like "x <-> (y | z <-> q)".
    1793                 :              */
    1794 GIC          72 :             maxwidth = Max(Ldata.width, Rdata.width);
    1795 CBC          72 :             Loffset = maxwidth - Ldata.width;
    1796              72 :             Roffset = maxwidth - Rdata.width;
    1797              72 :             data->width = maxwidth;
    1798 ECB             : 
    1799 GIC          72 :             if (Ldata.negate && Rdata.negate)
    1800 ECB             :             {
    1801                 :                 /* !L | !R: treat as !(L & R) */
    1802 GIC           3 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1803 ECB             :                                         TSPO_BOTH,
    1804                 :                                         Loffset, Roffset,
    1805 GIC           3 :                                         Min(Ldata.npos, Rdata.npos));
    1806 CBC           3 :                 data->negate = true;
    1807               3 :                 return TS_YES;
    1808 ECB             :             }
    1809 GIC          69 :             else if (Ldata.negate)
    1810 ECB             :             {
    1811                 :                 /* !L | R: treat as !(L & !R) */
    1812 GIC          15 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1813 ECB             :                                         TSPO_L_ONLY,
    1814                 :                                         Loffset, Roffset,
    1815                 :                                         Ldata.npos);
    1816 GIC          15 :                 data->negate = true;
    1817 CBC          15 :                 return TS_YES;
    1818 ECB             :             }
    1819 GIC          54 :             else if (Rdata.negate)
    1820 ECB             :             {
    1821                 :                 /* L | !R: treat as !(!L & R) */
    1822 GIC           3 :                 (void) TS_phrase_output(data, &Ldata, &Rdata,
    1823 ECB             :                                         TSPO_R_ONLY,
    1824                 :                                         Loffset, Roffset,
    1825                 :                                         Rdata.npos);
    1826 GIC           3 :                 data->negate = true;
    1827 CBC           3 :                 return TS_YES;
    1828 ECB             :             }
    1829                 :             else
    1830                 :             {
    1831                 :                 /* straight OR */
    1832 GIC          51 :                 return TS_phrase_output(data, &Ldata, &Rdata,
    1833 ECB             :                                         TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    1834                 :                                         Loffset, Roffset,
    1835 GIC          51 :                                         Ldata.npos + Rdata.npos);
    1836 ECB             :             }
    1837                 : 
    1838 UIC           0 :         default:
    1839 UBC           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1840 EUB             :     }
    1841                 : 
    1842                 :     /* not reachable, but keep compiler quiet */
    1843 UIC           0 :     return TS_NO;
    1844 EUB             : }
    1845                 : 
    1846                 : 
    1847                 : /*
    1848                 :  * Evaluate tsquery boolean expression.
    1849                 :  *
    1850                 :  * curitem: current tsquery item (initially, the first one)
    1851                 :  * arg: opaque value to pass through to callback function
    1852                 :  * flags: bitmask of flag bits shown in ts_utils.h
    1853                 :  * chkcond: callback function to check whether a primitive value is present
    1854                 :  */
    1855                 : bool
    1856 GIC      259957 : TS_execute(QueryItem *curitem, void *arg, uint32 flags,
    1857 ECB             :            TSExecuteCallback chkcond)
    1858                 : {
    1859                 :     /*
    1860                 :      * If we get TS_MAYBE from the recursion, return true.  We could only see
    1861                 :      * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
    1862                 :      * need to check again.
    1863                 :      */
    1864 GIC      259957 :     return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
    1865 ECB             : }
    1866                 : 
    1867                 : /*
    1868                 :  * Evaluate tsquery boolean expression.
    1869                 :  *
    1870                 :  * This is the same as TS_execute except that TS_MAYBE is returned as-is.
    1871                 :  */
    1872                 : TSTernaryValue
    1873 GIC       18471 : TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
    1874 ECB             :                    TSExecuteCallback chkcond)
    1875                 : {
    1876 GIC       18471 :     return TS_execute_recurse(curitem, arg, flags, chkcond);
    1877 ECB             : }
    1878                 : 
    1879                 : /*
    1880                 :  * TS_execute recursion for operators above any phrase operator.  Here we do
    1881                 :  * not need to worry about lexeme positions.  As soon as we hit an OP_PHRASE
    1882                 :  * operator, we pass it off to TS_phrase_execute which does worry.
    1883                 :  */
    1884                 : static TSTernaryValue
    1885 GIC      527408 : TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
    1886 ECB             :                    TSExecuteCallback chkcond)
    1887                 : {
    1888                 :     TSTernaryValue lmatch;
    1889                 : 
    1890                 :     /* since this function recurses, it could be driven to stack overflow */
    1891 GIC      527408 :     check_stack_depth();
    1892 ECB             : 
    1893                 :     /* ... and let's check for query cancel while we're at it */
    1894 GIC      527408 :     CHECK_FOR_INTERRUPTS();
    1895 ECB             : 
    1896 GIC      527408 :     if (curitem->type == QI_VAL)
    1897 CBC      211662 :         return chkcond(arg, (QueryOperand *) curitem,
    1898 ECB             :                        NULL /* don't need position info */ );
    1899                 : 
    1900 GIC      315746 :     switch (curitem->qoperator.oper)
    1901 ECB             :     {
    1902 GIC      101628 :         case OP_NOT:
    1903 CBC      101628 :             if (flags & TS_EXEC_SKIP_NOT)
    1904 LBC           0 :                 return TS_YES;
    1905 GBC      101628 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1906 ECB             :             {
    1907 GIC       95879 :                 case TS_NO:
    1908 CBC       95879 :                     return TS_YES;
    1909            2445 :                 case TS_YES:
    1910            2445 :                     return TS_NO;
    1911            3304 :                 case TS_MAYBE:
    1912            3304 :                     return TS_MAYBE;
    1913 ECB             :             }
    1914 UIC           0 :             break;
    1915 EUB             : 
    1916 GIC       41870 :         case OP_AND:
    1917 CBC       41870 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1918 ECB             :                                         flags, chkcond);
    1919 GIC       41870 :             if (lmatch == TS_NO)
    1920 CBC       33264 :                 return TS_NO;
    1921            8606 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1922 ECB             :             {
    1923 GIC        5058 :                 case TS_NO:
    1924 CBC        5058 :                     return TS_NO;
    1925            1650 :                 case TS_YES:
    1926            1650 :                     return lmatch;
    1927            1898 :                 case TS_MAYBE:
    1928            1898 :                     return TS_MAYBE;
    1929 ECB             :             }
    1930 UIC           0 :             break;
    1931 EUB             : 
    1932 GIC       54483 :         case OP_OR:
    1933 CBC       54483 :             lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
    1934 ECB             :                                         flags, chkcond);
    1935 GIC       54483 :             if (lmatch == TS_YES)
    1936 CBC       12090 :                 return TS_YES;
    1937           42393 :             switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
    1938 ECB             :             {
    1939 GIC       28753 :                 case TS_NO:
    1940 CBC       28753 :                     return lmatch;
    1941            3708 :                 case TS_YES:
    1942            3708 :                     return TS_YES;
    1943            9932 :                 case TS_MAYBE:
    1944            9932 :                     return TS_MAYBE;
    1945 ECB             :             }
    1946 UIC           0 :             break;
    1947 EUB             : 
    1948 GIC      117765 :         case OP_PHRASE:
    1949 ECB             : 
    1950                 :             /*
    1951                 :              * If we get a MAYBE result, and the caller doesn't want that,
    1952                 :              * convert it to NO.  It would be more consistent, perhaps, to
    1953                 :              * return the result of TS_phrase_execute() verbatim and then
    1954                 :              * convert MAYBE results at the top of the recursion.  But
    1955                 :              * converting at the topmost phrase operator gives results that
    1956                 :              * are bug-compatible with the old implementation, so do it like
    1957                 :              * this for now.
    1958                 :              */
    1959 GIC      117765 :             switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
    1960 ECB             :             {
    1961 GIC       90199 :                 case TS_NO:
    1962 CBC       90199 :                     return TS_NO;
    1963           14655 :                 case TS_YES:
    1964           14655 :                     return TS_YES;
    1965           12911 :                 case TS_MAYBE:
    1966           12911 :                     return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
    1967 ECB             :             }
    1968 UIC           0 :             break;
    1969 EUB             : 
    1970 UIC           0 :         default:
    1971 UBC           0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    1972 EUB             :     }
    1973                 : 
    1974                 :     /* not reachable, but keep compiler quiet */
    1975 UIC           0 :     return TS_NO;
    1976 EUB             : }
    1977                 : 
    1978                 : /*
    1979                 :  * Evaluate tsquery and report locations of matching terms.
    1980                 :  *
    1981                 :  * This is like TS_execute except that it returns match locations not just
    1982                 :  * success/failure status.  The callback function is required to provide
    1983                 :  * position data (we report failure if it doesn't).
    1984                 :  *
    1985                 :  * On successful match, the result is a List of ExecPhraseData structs, one
    1986                 :  * for each AND'ed term or phrase operator in the query.  Each struct includes
    1987                 :  * a sorted array of lexeme positions matching that term.  (Recall that for
    1988                 :  * phrase operators, the match includes width+1 lexemes, and the recorded
    1989                 :  * position is that of the rightmost lexeme.)
    1990                 :  *
    1991                 :  * OR subexpressions are handled by union'ing their match locations into a
    1992                 :  * single List element, which is valid since any of those locations contains
    1993                 :  * a match.  However, when some of the OR'ed terms are phrase operators, we
    1994                 :  * report the maximum width of any of the OR'ed terms, making such cases
    1995                 :  * slightly imprecise in the conservative direction.  (For example, if the
    1996                 :  * tsquery is "(A <-> B) | C", an occurrence of C in the data would be
    1997                 :  * reported as though it includes the lexeme to the left of C.)
    1998                 :  *
    1999                 :  * Locations of NOT subexpressions are not reported.  (Obviously, there can
    2000                 :  * be no successful NOT matches at top level, or the match would have failed.
    2001                 :  * So this amounts to ignoring NOTs underneath ORs.)
    2002                 :  *
    2003                 :  * The result is NIL if no match, or if position data was not returned.
    2004                 :  *
    2005                 :  * Arguments are the same as for TS_execute, although flags is currently
    2006                 :  * vestigial since none of the defined bits are sensible here.
    2007                 :  */
    2008                 : List *
    2009 GNC         181 : TS_execute_locations(QueryItem *curitem, void *arg,
    2010                 :                      uint32 flags,
    2011                 :                      TSExecuteCallback chkcond)
    2012                 : {
    2013                 :     List       *result;
    2014                 : 
    2015                 :     /* No flags supported, as yet */
    2016             181 :     Assert(flags == TS_EXEC_EMPTY);
    2017             181 :     if (TS_execute_locations_recurse(curitem, arg, chkcond, &result))
    2018              64 :         return result;
    2019             117 :     return NIL;
    2020                 : }
    2021                 : 
    2022                 : /*
    2023                 :  * TS_execute_locations recursion for operators above any phrase operator.
    2024                 :  * OP_PHRASE subexpressions can be passed off to TS_phrase_execute.
    2025                 :  */
    2026                 : static bool
    2027             535 : TS_execute_locations_recurse(QueryItem *curitem, void *arg,
    2028                 :                              TSExecuteCallback chkcond,
    2029                 :                              List **locations)
    2030                 : {
    2031                 :     bool        lmatch,
    2032                 :                 rmatch;
    2033                 :     List       *llocations,
    2034                 :                *rlocations;
    2035                 :     ExecPhraseData *data;
    2036                 : 
    2037                 :     /* since this function recurses, it could be driven to stack overflow */
    2038             535 :     check_stack_depth();
    2039                 : 
    2040                 :     /* ... and let's check for query cancel while we're at it */
    2041             535 :     CHECK_FOR_INTERRUPTS();
    2042                 : 
    2043                 :     /* Default locations result is empty */
    2044             535 :     *locations = NIL;
    2045                 : 
    2046             535 :     if (curitem->type == QI_VAL)
    2047                 :     {
    2048             223 :         data = palloc0_object(ExecPhraseData);
    2049             223 :         if (chkcond(arg, (QueryOperand *) curitem, data) == TS_YES)
    2050                 :         {
    2051             106 :             *locations = list_make1(data);
    2052             106 :             return true;
    2053                 :         }
    2054             117 :         pfree(data);
    2055             117 :         return false;
    2056                 :     }
    2057                 : 
    2058             312 :     switch (curitem->qoperator.oper)
    2059                 :     {
    2060               6 :         case OP_NOT:
    2061               6 :             if (!TS_execute_locations_recurse(curitem + 1, arg, chkcond,
    2062                 :                                               &llocations))
    2063 UNC           0 :                 return true;    /* we don't pass back any locations */
    2064 GNC           6 :             return false;
    2065                 : 
    2066             264 :         case OP_AND:
    2067             264 :             if (!TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2068                 :                                               arg, chkcond,
    2069                 :                                               &llocations))
    2070             204 :                 return false;
    2071              60 :             if (!TS_execute_locations_recurse(curitem + 1,
    2072                 :                                               arg, chkcond,
    2073                 :                                               &rlocations))
    2074              27 :                 return false;
    2075              33 :             *locations = list_concat(llocations, rlocations);
    2076              33 :             return true;
    2077                 : 
    2078              12 :         case OP_OR:
    2079              12 :             lmatch = TS_execute_locations_recurse(curitem + curitem->qoperator.left,
    2080                 :                                                   arg, chkcond,
    2081                 :                                                   &llocations);
    2082              12 :             rmatch = TS_execute_locations_recurse(curitem + 1,
    2083                 :                                                   arg, chkcond,
    2084                 :                                                   &rlocations);
    2085              12 :             if (lmatch || rmatch)
    2086                 :             {
    2087                 :                 /*
    2088                 :                  * We generate an AND'able location struct from each
    2089                 :                  * combination of sub-matches, following the disjunctive law
    2090                 :                  * (A & B) | (C & D) = (A | C) & (A | D) & (B | C) & (B | D).
    2091                 :                  *
    2092                 :                  * However, if either input didn't produce locations (i.e., it
    2093                 :                  * failed or was a NOT), we must just return the other list.
    2094                 :                  */
    2095              12 :                 if (llocations == NIL)
    2096 UNC           0 :                     *locations = rlocations;
    2097 GNC          12 :                 else if (rlocations == NIL)
    2098               6 :                     *locations = llocations;
    2099                 :                 else
    2100                 :                 {
    2101                 :                     ListCell   *ll;
    2102                 : 
    2103              12 :                     foreach(ll, llocations)
    2104                 :                     {
    2105               6 :                         ExecPhraseData *ldata = (ExecPhraseData *) lfirst(ll);
    2106                 :                         ListCell   *lr;
    2107                 : 
    2108              12 :                         foreach(lr, rlocations)
    2109                 :                         {
    2110               6 :                             ExecPhraseData *rdata = (ExecPhraseData *) lfirst(lr);
    2111                 : 
    2112               6 :                             data = palloc0_object(ExecPhraseData);
    2113               6 :                             (void) TS_phrase_output(data, ldata, rdata,
    2114                 :                                                     TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
    2115                 :                                                     0, 0,
    2116               6 :                                                     ldata->npos + rdata->npos);
    2117                 :                             /* Report the larger width, as explained above. */
    2118               6 :                             data->width = Max(ldata->width, rdata->width);
    2119               6 :                             *locations = lappend(*locations, data);
    2120                 :                         }
    2121                 :                     }
    2122                 :                 }
    2123                 : 
    2124              12 :                 return true;
    2125                 :             }
    2126 UNC           0 :             return false;
    2127                 : 
    2128 GNC          30 :         case OP_PHRASE:
    2129                 :             /* We can hand this off to TS_phrase_execute */
    2130              30 :             data = palloc0_object(ExecPhraseData);
    2131              30 :             if (TS_phrase_execute(curitem, arg, TS_EXEC_EMPTY, chkcond,
    2132                 :                                   data) == TS_YES)
    2133                 :             {
    2134              30 :                 if (!data->negate)
    2135              30 :                     *locations = list_make1(data);
    2136              30 :                 return true;
    2137                 :             }
    2138 UNC           0 :             pfree(data);
    2139               0 :             return false;
    2140                 : 
    2141               0 :         default:
    2142               0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2143                 :     }
    2144                 : 
    2145                 :     /* not reachable, but keep compiler quiet */
    2146                 :     return false;
    2147                 : }
    2148                 : 
    2149                 : /*
    2150                 :  * Detect whether a tsquery boolean expression requires any positive matches
    2151                 :  * to values shown in the tsquery.
    2152                 :  *
    2153                 :  * This is needed to know whether a GIN index search requires full index scan.
    2154                 :  * For example, 'x & !y' requires a match of x, so it's sufficient to scan
    2155                 :  * entries for x; but 'x | !y' could match rows containing neither x nor y.
    2156                 :  */
    2157                 : bool
    2158 GIC         417 : tsquery_requires_match(QueryItem *curitem)
    2159                 : {
    2160                 :     /* since this function recurses, it could be driven to stack overflow */
    2161             417 :     check_stack_depth();
    2162                 : 
    2163             417 :     if (curitem->type == QI_VAL)
    2164             198 :         return true;
    2165                 : 
    2166             219 :     switch (curitem->qoperator.oper)
    2167                 :     {
    2168              84 :         case OP_NOT:
    2169                 : 
    2170                 :             /*
    2171                 :              * Assume there are no required matches underneath a NOT.  For
    2172                 :              * some cases with nested NOTs, we could prove there's a required
    2173                 :              * match, but it seems unlikely to be worth the trouble.
    2174                 :              */
    2175              84 :             return false;
    2176                 : 
    2177             102 :         case OP_PHRASE:
    2178                 : 
    2179                 :             /*
    2180                 :              * Treat OP_PHRASE as OP_AND here
    2181 ECB             :              */
    2182                 :         case OP_AND:
    2183                 :             /* If either side requires a match, we're good */
    2184 GIC         102 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2185              78 :                 return true;
    2186                 :             else
    2187              24 :                 return tsquery_requires_match(curitem + 1);
    2188 ECB             : 
    2189 CBC          33 :         case OP_OR:
    2190 ECB             :             /* Both sides must require a match */
    2191 CBC          33 :             if (tsquery_requires_match(curitem + curitem->qoperator.left))
    2192 GIC          33 :                 return tsquery_requires_match(curitem + 1);
    2193                 :             else
    2194 UIC           0 :                 return false;
    2195                 : 
    2196               0 :         default:
    2197               0 :             elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
    2198                 :     }
    2199 ECB             : 
    2200                 :     /* not reachable, but keep compiler quiet */
    2201                 :     return false;
    2202                 : }
    2203                 : 
    2204                 : /*
    2205                 :  * boolean operations
    2206                 :  */
    2207                 : Datum
    2208 GIC          30 : ts_match_qv(PG_FUNCTION_ARGS)
    2209                 : {
    2210 CBC          30 :     PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
    2211                 :                                         PG_GETARG_DATUM(1),
    2212                 :                                         PG_GETARG_DATUM(0)));
    2213 ECB             : }
    2214                 : 
    2215                 : Datum
    2216 CBC      110040 : ts_match_vq(PG_FUNCTION_ARGS)
    2217                 : {
    2218          110040 :     TSVector    val = PG_GETARG_TSVECTOR(0);
    2219 GIC      110040 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2220 ECB             :     CHKVAL      chkval;
    2221                 :     bool        result;
    2222                 : 
    2223                 :     /* empty query matches nothing */
    2224 CBC      110040 :     if (!query->size)
    2225                 :     {
    2226 LBC           0 :         PG_FREE_IF_COPY(val, 0);
    2227               0 :         PG_FREE_IF_COPY(query, 1);
    2228 UIC           0 :         PG_RETURN_BOOL(false);
    2229                 :     }
    2230 ECB             : 
    2231 GIC      110040 :     chkval.arrb = ARRPTR(val);
    2232 CBC      110040 :     chkval.arre = chkval.arrb + val->size;
    2233          110040 :     chkval.values = STRPTR(val);
    2234 GIC      110040 :     chkval.operand = GETOPERAND(query);
    2235 GBC      110040 :     result = TS_execute(GETQUERY(query),
    2236 ECB             :                         &chkval,
    2237                 :                         TS_EXEC_EMPTY,
    2238                 :                         checkcondition_str);
    2239                 : 
    2240 GIC      110040 :     PG_FREE_IF_COPY(val, 0);
    2241          110040 :     PG_FREE_IF_COPY(query, 1);
    2242 CBC      110040 :     PG_RETURN_BOOL(result);
    2243 ECB             : }
    2244                 : 
    2245                 : Datum
    2246 LBC           0 : ts_match_tt(PG_FUNCTION_ARGS)
    2247 ECB             : {
    2248                 :     TSVector    vector;
    2249                 :     TSQuery     query;
    2250                 :     bool        res;
    2251                 : 
    2252 UIC           0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2253                 :                                                   PG_GETARG_DATUM(0)));
    2254 LBC           0 :     query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
    2255                 :                                                 PG_GETARG_DATUM(1)));
    2256                 : 
    2257               0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2258                 :                                            TSVectorGetDatum(vector),
    2259                 :                                            TSQueryGetDatum(query)));
    2260                 : 
    2261 UIC           0 :     pfree(vector);
    2262               0 :     pfree(query);
    2263                 : 
    2264               0 :     PG_RETURN_BOOL(res);
    2265                 : }
    2266                 : 
    2267 ECB             : Datum
    2268 UBC           0 : ts_match_tq(PG_FUNCTION_ARGS)
    2269 ECB             : {
    2270                 :     TSVector    vector;
    2271 UIC           0 :     TSQuery     query = PG_GETARG_TSQUERY(1);
    2272                 :     bool        res;
    2273                 : 
    2274               0 :     vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
    2275 ECB             :                                                   PG_GETARG_DATUM(0)));
    2276                 : 
    2277 LBC           0 :     res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
    2278                 :                                            TSVectorGetDatum(vector),
    2279                 :                                            TSQueryGetDatum(query)));
    2280 ECB             : 
    2281 UIC           0 :     pfree(vector);
    2282 LBC           0 :     PG_FREE_IF_COPY(query, 1);
    2283                 : 
    2284               0 :     PG_RETURN_BOOL(res);
    2285 ECB             : }
    2286                 : 
    2287                 : /*
    2288                 :  * ts_stat statistic function support
    2289                 :  */
    2290                 : 
    2291                 : 
    2292                 : /*
    2293                 :  * Returns the number of positions in value 'wptr' within tsvector 'txt',
    2294                 :  * that have a weight equal to one of the weights in 'weight' bitmask.
    2295                 :  */
    2296                 : static int
    2297 GIC        4089 : check_weight(TSVector txt, WordEntry *wptr, int8 weight)
    2298 EUB             : {
    2299 GIC        4089 :     int         len = POSDATALEN(txt, wptr);
    2300 CBC        4089 :     int         num = 0;
    2301 GIC        4089 :     WordEntryPos *ptr = POSDATAPTR(txt, wptr);
    2302 ECB             : 
    2303 CBC        8325 :     while (len--)
    2304                 :     {
    2305 GIC        4236 :         if (weight & (1 << WEP_GETWEIGHT(*ptr)))
    2306 CBC           6 :             num++;
    2307            4236 :         ptr++;
    2308 ECB             :     }
    2309 GIC        4089 :     return num;
    2310 EUB             : }
    2311                 : 
    2312                 : #define compareStatWord(a,e,t)                          \
    2313                 :     tsCompareString((a)->lexeme, (a)->lenlexeme,      \
    2314                 :                     STRPTR(t) + (e)->pos, (e)->len,       \
    2315                 :                     false)
    2316                 : 
    2317                 : static void
    2318 GIC      172812 : insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
    2319                 : {
    2320          172812 :     WordEntry  *we = ARRPTR(txt) + off;
    2321          172812 :     StatEntry  *node = stat->root,
    2322          172812 :                *pnode = NULL;
    2323                 :     int         n,
    2324          172812 :                 res = 0;
    2325          172812 :     uint32      depth = 1;
    2326                 : 
    2327          172812 :     if (stat->weight == 0)
    2328           86406 :         n = (we->haspos) ? POSDATALEN(txt, we) : 1;
    2329                 :     else
    2330 CBC       86406 :         n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
    2331                 : 
    2332 GIC      172812 :     if (n == 0)
    2333 CBC       86403 :         return;                 /* nothing to insert */
    2334                 : 
    2335          872691 :     while (node)
    2336 ECB             :     {
    2337 GIC      869259 :         res = compareStatWord(node, we, txt);
    2338 ECB             : 
    2339 GIC      869259 :         if (res == 0)
    2340 ECB             :         {
    2341 GIC       82977 :             break;
    2342                 :         }
    2343                 :         else
    2344                 :         {
    2345          786282 :             pnode = node;
    2346          786282 :             node = (res < 0) ? node->left : node->right;
    2347 ECB             :         }
    2348 GIC      786282 :         depth++;
    2349 ECB             :     }
    2350                 : 
    2351 GIC       86409 :     if (depth > stat->maxdepth)
    2352              63 :         stat->maxdepth = depth;
    2353                 : 
    2354           86409 :     if (node == NULL)
    2355                 :     {
    2356 CBC        3432 :         node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
    2357            3432 :         node->left = node->right = NULL;
    2358 GIC        3432 :         node->ndoc = 1;
    2359 CBC        3432 :         node->nentry = n;
    2360 GIC        3432 :         node->lenlexeme = we->len;
    2361 CBC        3432 :         memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
    2362                 : 
    2363            3432 :         if (pnode == NULL)
    2364 ECB             :         {
    2365 GIC           6 :             stat->root = node;
    2366 EUB             :         }
    2367                 :         else
    2368                 :         {
    2369 GBC        3426 :             if (res < 0)
    2370 GIC        1690 :                 pnode->left = node;
    2371                 :             else
    2372            1736 :                 pnode->right = node;
    2373                 :         }
    2374                 :     }
    2375                 :     else
    2376                 :     {
    2377           82977 :         node->ndoc++;
    2378           82977 :         node->nentry += n;
    2379                 :     }
    2380 ECB             : }
    2381                 : 
    2382                 : static void
    2383 GIC      247692 : chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
    2384                 :                     uint32 low, uint32 high, uint32 offset)
    2385                 : {
    2386                 :     uint32      pos;
    2387          247692 :     uint32      middle = (low + high) >> 1;
    2388 ECB             : 
    2389 GIC      247692 :     pos = (low + middle) >> 1;
    2390 CBC      247692 :     if (low != middle && pos >= offset && pos - offset < txt->size)
    2391           85164 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2392 GIC      247692 :     pos = (high + middle + 1) >> 1;
    2393          247692 :     if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
    2394           84642 :         insertStatEntry(persistentContext, stat, txt, pos - offset);
    2395                 : 
    2396 CBC      247692 :     if (low != middle)
    2397 GIC      123846 :         chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
    2398 GBC      247692 :     if (high != middle + 1)
    2399          120840 :         chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
    2400          247692 : }
    2401                 : 
    2402                 : /*
    2403 ECB             :  * This is written like a custom aggregate function, because the
    2404                 :  * original plan was to do just that. Unfortunately, an aggregate function
    2405                 :  * can't return a set, so that plan was abandoned. If that limitation is
    2406                 :  * lifted in the future, ts_stat could be a real aggregate function so that
    2407                 :  * you could use it like this:
    2408                 :  *
    2409                 :  *   SELECT ts_stat(vector_column) FROM vector_table;
    2410                 :  *
    2411                 :  *  where vector_column is a tsvector-type column in vector_table.
    2412                 :  */
    2413                 : 
    2414                 : static TSVectorStat *
    2415 GIC        3054 : ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
    2416                 : {
    2417            3054 :     TSVector    txt = DatumGetTSVector(data);
    2418 EUB             :     uint32      i,
    2419 GIC        3054 :                 nbit = 0,
    2420                 :                 offset;
    2421                 : 
    2422            3054 :     if (stat == NULL)
    2423                 :     {                           /* Init in first */
    2424 UBC           0 :         stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2425 UIC           0 :         stat->maxdepth = 1;
    2426 EUB             :     }
    2427                 : 
    2428                 :     /* simple check of correctness */
    2429 GBC        3054 :     if (txt == NULL || txt->size == 0)
    2430                 :     {
    2431 GIC          48 :         if (txt && txt != (TSVector) DatumGetPointer(data))
    2432              48 :             pfree(txt);
    2433 GBC          48 :         return stat;
    2434 EUB             :     }
    2435                 : 
    2436 GBC        3006 :     i = txt->size - 1;
    2437 GIC       21360 :     for (; i > 0; i >>= 1)
    2438           18354 :         nbit++;
    2439                 : 
    2440 GBC        3006 :     nbit = 1 << nbit;
    2441 GIC        3006 :     offset = (nbit - txt->size) / 2;
    2442                 : 
    2443 GBC        3006 :     insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
    2444 GIC        3006 :     chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
    2445                 : 
    2446 GBC        3006 :     return stat;
    2447                 : }
    2448                 : 
    2449 EUB             : static void
    2450 GIC           6 : ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
    2451                 :                    TSVectorStat *stat)
    2452                 : {
    2453 EUB             :     TupleDesc   tupdesc;
    2454                 :     MemoryContext oldcontext;
    2455                 :     StatEntry  *node;
    2456                 : 
    2457 GIC           6 :     funcctx->user_fctx = (void *) stat;
    2458                 : 
    2459               6 :     oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    2460                 : 
    2461               6 :     stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
    2462               6 :     stat->stackpos = 0;
    2463                 : 
    2464               6 :     node = stat->root;
    2465                 :     /* find leftmost value */
    2466               6 :     if (node == NULL)
    2467 UIC           0 :         stat->stack[stat->stackpos] = NULL;
    2468                 :     else
    2469 ECB             :         for (;;)
    2470                 :         {
    2471 CBC          24 :             stat->stack[stat->stackpos] = node;
    2472              24 :             if (node->left)
    2473 ECB             :             {
    2474 GIC          18 :                 stat->stackpos++;
    2475 CBC          18 :                 node = node->left;
    2476                 :             }
    2477 ECB             :             else
    2478 CBC           6 :                 break;
    2479 ECB             :         }
    2480 GIC           6 :     Assert(stat->stackpos <= stat->maxdepth);
    2481 ECB             : 
    2482 GNC           6 :     if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
    2483 UNC           0 :         elog(ERROR, "return type must be a row type");
    2484 GNC           6 :     funcctx->tuple_desc = tupdesc;
    2485 CBC           6 :     funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
    2486                 : 
    2487               6 :     MemoryContextSwitchTo(oldcontext);
    2488               6 : }
    2489 ECB             : 
    2490                 : static StatEntry *
    2491 CBC        6864 : walkStatEntryTree(TSVectorStat *stat)
    2492 ECB             : {
    2493 GIC        6864 :     StatEntry  *node = stat->stack[stat->stackpos];
    2494 ECB             : 
    2495 CBC        6864 :     if (node == NULL)
    2496 UIC           0 :         return NULL;
    2497 ECB             : 
    2498 GIC        6864 :     if (node->ndoc != 0)
    2499 ECB             :     {
    2500                 :         /* return entry itself: we already was at left sublink */
    2501 GIC        1696 :         return node;
    2502 ECB             :     }
    2503 GIC        5168 :     else if (node->right && node->right != stat->stack[stat->stackpos + 1])
    2504 ECB             :     {
    2505                 :         /* go on right sublink */
    2506 CBC        1736 :         stat->stackpos++;
    2507 GIC        1736 :         node = node->right;
    2508 ECB             : 
    2509                 :         /* find most-left value */
    2510                 :         for (;;)
    2511                 :         {
    2512 CBC        3408 :             stat->stack[stat->stackpos] = node;
    2513            3408 :             if (node->left)
    2514                 :             {
    2515            1672 :                 stat->stackpos++;
    2516 GIC        1672 :                 node = node->left;
    2517                 :             }
    2518 ECB             :             else
    2519 CBC        1736 :                 break;
    2520                 :         }
    2521            1736 :         Assert(stat->stackpos <= stat->maxdepth);
    2522                 :     }
    2523 ECB             :     else
    2524                 :     {
    2525                 :         /* we already return all left subtree, itself and  right subtree */
    2526 CBC        3432 :         if (stat->stackpos == 0)
    2527               6 :             return NULL;
    2528 ECB             : 
    2529 GIC        3426 :         stat->stackpos--;
    2530 CBC        3426 :         return walkStatEntryTree(stat);
    2531                 :     }
    2532 ECB             : 
    2533 GIC        1736 :     return node;
    2534                 : }
    2535                 : 
    2536 ECB             : static Datum
    2537 CBC        3438 : ts_process_call(FuncCallContext *funcctx)
    2538                 : {
    2539 ECB             :     TSVectorStat *st;
    2540                 :     StatEntry  *entry;
    2541                 : 
    2542 GIC        3438 :     st = (TSVectorStat *) funcctx->user_fctx;
    2543                 : 
    2544 CBC        3438 :     entry = walkStatEntryTree(st);
    2545 ECB             : 
    2546 GIC        3438 :     if (entry != NULL)
    2547                 :     {
    2548                 :         Datum       result;
    2549                 :         char       *values[3];
    2550 ECB             :         char        ndoc[16];
    2551                 :         char        nentry[16];
    2552                 :         HeapTuple   tuple;
    2553                 : 
    2554 CBC        3432 :         values[0] = palloc(entry->lenlexeme + 1);
    2555 GIC        3432 :         memcpy(values[0], entry->lexeme, entry->lenlexeme);
    2556 CBC        3432 :         (values[0])[entry->lenlexeme] = '\0';
    2557            3432 :         sprintf(ndoc, "%d", entry->ndoc);
    2558            3432 :         values[1] = ndoc;
    2559            3432 :         sprintf(nentry, "%d", entry->nentry);
    2560            3432 :         values[2] = nentry;
    2561 ECB             : 
    2562 GIC        3432 :         tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
    2563 CBC        3432 :         result = HeapTupleGetDatum(tuple);
    2564 ECB             : 
    2565 CBC        3432 :         pfree(values[0]);
    2566 ECB             : 
    2567                 :         /* mark entry as already visited */
    2568 GIC        3432 :         entry->ndoc = 0;
    2569                 : 
    2570            3432 :         return result;
    2571                 :     }
    2572                 : 
    2573               6 :     return (Datum) 0;
    2574                 : }
    2575                 : 
    2576                 : static TSVectorStat *
    2577               6 : ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
    2578                 : {
    2579               6 :     char       *query = text_to_cstring(txt);
    2580                 :     TSVectorStat *stat;
    2581                 :     bool        isnull;
    2582 ECB             :     Portal      portal;
    2583                 :     SPIPlanPtr  plan;
    2584                 : 
    2585 GIC           6 :     if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
    2586 ECB             :         /* internal error */
    2587 UIC           0 :         elog(ERROR, "SPI_prepare(\"%s\") failed", query);
    2588                 : 
    2589 CBC           6 :     if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
    2590                 :         /* internal error */
    2591 UBC           0 :         elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
    2592 EUB             : 
    2593 GIC           6 :     SPI_cursor_fetch(portal, true, 100);
    2594                 : 
    2595               6 :     if (SPI_tuptable == NULL ||
    2596 CBC           6 :         SPI_tuptable->tupdesc->natts != 1 ||
    2597 GIC           6 :         !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
    2598 ECB             :                            TSVECTOROID))
    2599 LBC           0 :         ereport(ERROR,
    2600 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2601                 :                  errmsg("ts_stat query must return one tsvector column")));
    2602                 : 
    2603 CBC           6 :     stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
    2604               6 :     stat->maxdepth = 1;
    2605 ECB             : 
    2606 GIC           6 :     if (ws)
    2607 ECB             :     {
    2608                 :         char       *buf;
    2609                 : 
    2610 CBC           3 :         buf = VARDATA_ANY(ws);
    2611               9 :         while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
    2612                 :         {
    2613               6 :             if (pg_mblen(buf) == 1)
    2614                 :             {
    2615 GIC           6 :                 switch (*buf)
    2616                 :                 {
    2617 CBC           3 :                     case 'A':
    2618                 :                     case 'a':
    2619 GIC           3 :                         stat->weight |= 1 << 3;
    2620               3 :                         break;
    2621               3 :                     case 'B':
    2622                 :                     case 'b':
    2623               3 :                         stat->weight |= 1 << 2;
    2624 CBC           3 :                         break;
    2625 UIC           0 :                     case 'C':
    2626 ECB             :                     case 'c':
    2627 UIC           0 :                         stat->weight |= 1 << 1;
    2628 LBC           0 :                         break;
    2629               0 :                     case 'D':
    2630                 :                     case 'd':
    2631               0 :                         stat->weight |= 1;
    2632 UIC           0 :                         break;
    2633 LBC           0 :                     default:
    2634 UBC           0 :                         stat->weight |= 0;
    2635                 :                 }
    2636                 :             }
    2637 GIC           6 :             buf += pg_mblen(buf);
    2638 ECB             :         }
    2639                 :     }
    2640                 : 
    2641 CBC          42 :     while (SPI_processed > 0)
    2642 ECB             :     {
    2643                 :         uint64      i;
    2644                 : 
    2645 CBC        3090 :         for (i = 0; i < SPI_processed; i++)
    2646                 :         {
    2647            3054 :             Datum       data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
    2648                 : 
    2649            3054 :             if (!isnull)
    2650 GBC        3054 :                 stat = ts_accum(persistentContext, stat, data);
    2651 ECB             :         }
    2652                 : 
    2653 GIC          36 :         SPI_freetuptable(SPI_tuptable);
    2654 CBC          36 :         SPI_cursor_fetch(portal, true, 100);
    2655 ECB             :     }
    2656                 : 
    2657 GIC           6 :     SPI_freetuptable(SPI_tuptable);
    2658 CBC           6 :     SPI_cursor_close(portal);
    2659 GIC           6 :     SPI_freeplan(plan);
    2660 CBC           6 :     pfree(query);
    2661                 : 
    2662               6 :     return stat;
    2663 EUB             : }
    2664                 : 
    2665 ECB             : Datum
    2666 GIC        3432 : ts_stat1(PG_FUNCTION_ARGS)
    2667                 : {
    2668 ECB             :     FuncCallContext *funcctx;
    2669                 :     Datum       result;
    2670                 : 
    2671 GIC        3432 :     if (SRF_IS_FIRSTCALL())
    2672                 :     {
    2673 ECB             :         TSVectorStat *stat;
    2674 CBC           3 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2675                 : 
    2676 GIC           3 :         funcctx = SRF_FIRSTCALL_INIT();
    2677               3 :         SPI_connect();
    2678               3 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
    2679 CBC           3 :         PG_FREE_IF_COPY(txt, 0);
    2680               3 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2681 GIC           3 :         SPI_finish();
    2682 ECB             :     }
    2683                 : 
    2684 GIC        3432 :     funcctx = SRF_PERCALL_SETUP();
    2685            3432 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2686 CBC        3429 :         SRF_RETURN_NEXT(funcctx, result);
    2687 GIC           3 :     SRF_RETURN_DONE(funcctx);
    2688 ECB             : }
    2689                 : 
    2690                 : Datum
    2691 GIC           6 : ts_stat2(PG_FUNCTION_ARGS)
    2692                 : {
    2693 ECB             :     FuncCallContext *funcctx;
    2694                 :     Datum       result;
    2695                 : 
    2696 CBC           6 :     if (SRF_IS_FIRSTCALL())
    2697 ECB             :     {
    2698                 :         TSVectorStat *stat;
    2699 GIC           3 :         text       *txt = PG_GETARG_TEXT_PP(0);
    2700 CBC           3 :         text       *ws = PG_GETARG_TEXT_PP(1);
    2701                 : 
    2702 GIC           3 :         funcctx = SRF_FIRSTCALL_INIT();
    2703               3 :         SPI_connect();
    2704 CBC           3 :         stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
    2705 GIC           3 :         PG_FREE_IF_COPY(txt, 0);
    2706               3 :         PG_FREE_IF_COPY(ws, 1);
    2707               3 :         ts_setup_firstcall(fcinfo, funcctx, stat);
    2708               3 :         SPI_finish();
    2709 ECB             :     }
    2710                 : 
    2711 CBC           6 :     funcctx = SRF_PERCALL_SETUP();
    2712 GIC           6 :     if ((result = ts_process_call(funcctx)) != (Datum) 0)
    2713 CBC           3 :         SRF_RETURN_NEXT(funcctx, result);
    2714 GIC           3 :     SRF_RETURN_DONE(funcctx);
    2715                 : }
    2716                 : 
    2717                 : 
    2718                 : /*
    2719                 :  * Triggers for automatic update of a tsvector column from text column(s)
    2720                 :  *
    2721 ECB             :  * Trigger arguments are either
    2722                 :  *      name of tsvector col, name of tsconfig to use, name(s) of text col(s)
    2723                 :  *      name of tsvector col, name of regconfig col, name(s) of text col(s)
    2724                 :  * ie, tsconfig can either be specified by name, or indirectly as the
    2725                 :  * contents of a regconfig field in the row.  If the name is used, it must
    2726                 :  * be explicitly schema-qualified.
    2727                 :  */
    2728                 : Datum
    2729 CBC           9 : tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
    2730 ECB             : {
    2731 GIC           9 :     return tsvector_update_trigger(fcinfo, false);
    2732 ECB             : }
    2733                 : 
    2734                 : Datum
    2735 LBC           0 : tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
    2736                 : {
    2737               0 :     return tsvector_update_trigger(fcinfo, true);
    2738                 : }
    2739                 : 
    2740 ECB             : static Datum
    2741 GIC           9 : tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
    2742                 : {
    2743                 :     TriggerData *trigdata;
    2744 ECB             :     Trigger    *trigger;
    2745                 :     Relation    rel;
    2746 CBC           9 :     HeapTuple   rettuple = NULL;
    2747                 :     int         tsvector_attr_num,
    2748                 :                 i;
    2749                 :     ParsedText  prs;
    2750                 :     Datum       datum;
    2751                 :     bool        isnull;
    2752 ECB             :     text       *txt;
    2753                 :     Oid         cfgId;
    2754 EUB             :     bool        update_needed;
    2755                 : 
    2756 ECB             :     /* Check call context */
    2757 GIC           9 :     if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
    2758 UBC           0 :         elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
    2759                 : 
    2760 CBC           9 :     trigdata = (TriggerData *) fcinfo->context;
    2761 GIC           9 :     if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
    2762 LBC           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for row");
    2763 CBC           9 :     if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
    2764 LBC           0 :         elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
    2765                 : 
    2766 GBC           9 :     if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
    2767                 :     {
    2768 GIC           6 :         rettuple = trigdata->tg_trigtuple;
    2769               6 :         update_needed = true;
    2770 ECB             :     }
    2771 CBC           3 :     else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
    2772                 :     {
    2773               3 :         rettuple = trigdata->tg_newtuple;
    2774 GIC           3 :         update_needed = false;  /* computed below */
    2775                 :     }
    2776                 :     else
    2777 LBC           0 :         elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
    2778 ECB             : 
    2779 GIC           9 :     trigger = trigdata->tg_trigger;
    2780 CBC           9 :     rel = trigdata->tg_relation;
    2781                 : 
    2782               9 :     if (trigger->tgnargs < 3)
    2783 UIC           0 :         elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
    2784 ECB             : 
    2785                 :     /* Find the target tsvector column */
    2786 CBC           9 :     tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
    2787               9 :     if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
    2788 LBC           0 :         ereport(ERROR,
    2789                 :                 (errcode(ERRCODE_UNDEFINED_COLUMN),
    2790 ECB             :                  errmsg("tsvector column \"%s\" does not exist",
    2791                 :                         trigger->tgargs[0])));
    2792 EUB             :     /* This will effectively reject system columns, so no separate test: */
    2793 GIC           9 :     if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
    2794 EUB             :                            TSVECTOROID))
    2795 UBC           0 :         ereport(ERROR,
    2796 EUB             :                 (errcode(ERRCODE_DATATYPE_MISMATCH),
    2797                 :                  errmsg("column \"%s\" is not of tsvector type",
    2798                 :                         trigger->tgargs[0])));
    2799                 : 
    2800                 :     /* Find the configuration to use */
    2801 GBC           9 :     if (config_column)
    2802                 :     {
    2803                 :         int         config_attr_num;
    2804 ECB             : 
    2805 UIC           0 :         config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
    2806               0 :         if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
    2807               0 :             ereport(ERROR,
    2808 ECB             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2809                 :                      errmsg("configuration column \"%s\" does not exist",
    2810                 :                             trigger->tgargs[1])));
    2811 UIC           0 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
    2812 ECB             :                                REGCONFIGOID))
    2813 UIC           0 :             ereport(ERROR,
    2814 ECB             :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2815                 :                      errmsg("column \"%s\" is not of regconfig type",
    2816                 :                             trigger->tgargs[1])));
    2817                 : 
    2818 UIC           0 :         datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
    2819               0 :         if (isnull)
    2820 LBC           0 :             ereport(ERROR,
    2821 ECB             :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
    2822                 :                      errmsg("configuration column \"%s\" must not be null",
    2823                 :                             trigger->tgargs[1])));
    2824 LBC           0 :         cfgId = DatumGetObjectId(datum);
    2825 ECB             :     }
    2826                 :     else
    2827                 :     {
    2828                 :         List       *names;
    2829                 : 
    2830 GNC           9 :         names = stringToQualifiedNameList(trigger->tgargs[1], NULL);
    2831                 :         /* require a schema so that results are not search path dependent */
    2832 GIC           9 :         if (list_length(names) < 2)
    2833 LBC           0 :             ereport(ERROR,
    2834                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    2835                 :                      errmsg("text search configuration name \"%s\" must be schema-qualified",
    2836                 :                             trigger->tgargs[1])));
    2837 GIC           9 :         cfgId = get_ts_config_oid(names, false);
    2838 ECB             :     }
    2839                 : 
    2840                 :     /* initialize parse state */
    2841 CBC           9 :     prs.lenwords = 32;
    2842 GIC           9 :     prs.curwords = 0;
    2843 CBC           9 :     prs.pos = 0;
    2844               9 :     prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
    2845 ECB             : 
    2846                 :     /* find all words in indexable column(s) */
    2847 CBC          18 :     for (i = 2; i < trigger->tgnargs; i++)
    2848 ECB             :     {
    2849                 :         int         numattr;
    2850                 : 
    2851 CBC           9 :         numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
    2852               9 :         if (numattr == SPI_ERROR_NOATTRIBUTE)
    2853 LBC           0 :             ereport(ERROR,
    2854 ECB             :                     (errcode(ERRCODE_UNDEFINED_COLUMN),
    2855                 :                      errmsg("column \"%s\" does not exist",
    2856                 :                             trigger->tgargs[i])));
    2857 GIC           9 :         if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
    2858 LBC           0 :             ereport(ERROR,
    2859                 :                     (errcode(ERRCODE_DATATYPE_MISMATCH),
    2860                 :                      errmsg("column \"%s\" is not of a character type",
    2861                 :                             trigger->tgargs[i])));
    2862                 : 
    2863 CBC           9 :         if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
    2864 GIC           3 :             update_needed = true;
    2865                 : 
    2866 CBC           9 :         datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
    2867               9 :         if (isnull)
    2868 GIC           3 :             continue;
    2869 ECB             : 
    2870 CBC           6 :         txt = DatumGetTextPP(datum);
    2871 ECB             : 
    2872 CBC           6 :         parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
    2873 ECB             : 
    2874 CBC           6 :         if (txt != (text *) DatumGetPointer(datum))
    2875 LBC           0 :             pfree(txt);
    2876                 :     }
    2877                 : 
    2878 CBC           9 :     if (update_needed)
    2879 ECB             :     {
    2880                 :         /* make tsvector value */
    2881 CBC           9 :         datum = TSVectorGetDatum(make_tsvector(&prs));
    2882 GIC           9 :         isnull = false;
    2883                 : 
    2884                 :         /* and insert it into tuple */
    2885               9 :         rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
    2886                 :                                              1, &tsvector_attr_num,
    2887                 :                                              &datum, &isnull);
    2888                 : 
    2889               9 :         pfree(DatumGetPointer(datum));
    2890                 :     }
    2891                 : 
    2892               9 :     return PointerGetDatum(rettuple);
    2893                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a