Age Owner TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery.c
4 : * I/O functions for tsquery
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "libpq/pqformat.h"
18 : #include "miscadmin.h"
19 : #include "nodes/miscnodes.h"
20 : #include "tsearch/ts_locale.h"
21 : #include "tsearch/ts_type.h"
22 : #include "tsearch/ts_utils.h"
23 : #include "utils/builtins.h"
24 : #include "utils/memutils.h"
25 : #include "utils/pg_crc.h"
26 : #include "varatt.h"
27 :
28 : /* FTS operator priorities, see ts_type.h */
29 : const int tsearch_op_priority[OP_COUNT] =
30 : {
31 : 4, /* OP_NOT */
32 : 2, /* OP_AND */
33 : 1, /* OP_OR */
34 : 3 /* OP_PHRASE */
35 : };
36 :
37 : /*
38 : * parser's states
39 : */
40 : typedef enum
41 : {
42 : WAITOPERAND = 1,
43 : WAITOPERATOR = 2,
44 : WAITFIRSTOPERAND = 3
45 : } ts_parserstate;
46 :
47 : /*
48 : * token types for parsing
49 : */
50 : typedef enum
51 : {
52 : PT_END = 0,
53 : PT_ERR = 1,
54 : PT_VAL = 2,
55 : PT_OPR = 3,
56 : PT_OPEN = 4,
57 : PT_CLOSE = 5
58 : } ts_tokentype;
59 :
60 : /*
61 : * get token from query string
62 : *
63 : * All arguments except "state" are output arguments.
64 : *
65 : * If return value is PT_OPR, then *operator is filled with an OP_* code
66 : * and *weight will contain a distance value in case of phrase operator.
67 : *
68 : * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
69 : * are filled.
70 : *
71 : * If PT_ERR is returned then a soft error has occurred. If state->escontext
72 : * isn't already filled then this should be reported as a generic parse error.
73 : */
74 : typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
75 : int *lenval, char **strval,
76 : int16 *weight, bool *prefix);
77 :
78 : struct TSQueryParserStateData
79 : {
80 : /* Tokenizer used for parsing tsquery */
81 : ts_tokenizer gettoken;
82 :
83 : /* State of tokenizer function */
84 : char *buffer; /* entire string we are scanning */
85 : char *buf; /* current scan point */
86 : int count; /* nesting count, incremented by (,
87 : * decremented by ) */
88 : ts_parserstate state;
89 :
90 : /* polish (prefix) notation in list, filled in by push* functions */
91 : List *polstr;
92 :
93 : /*
94 : * Strings from operands are collected in op. curop is a pointer to the
95 : * end of used space of op.
96 : */
97 : char *op;
98 : char *curop;
99 : int lenop; /* allocated size of op */
100 : int sumlen; /* used size of op */
101 :
102 : /* state for value's parser */
103 : TSVectorParseState valstate;
104 :
105 : /* context object for soft errors - must match valstate's escontext */
106 : Node *escontext;
107 : };
108 :
109 : /*
110 : * subroutine to parse the modifiers (weight and prefix flag currently)
111 : * part, like ':AB*' of a query.
112 : */
113 : static char *
5441 tgl 114 GIC 3603 : get_modifiers(char *buf, int16 *weight, bool *prefix)
115 : {
5710 116 3603 : *weight = 0;
5441 117 3603 : *prefix = false;
118 :
5710 119 3603 : if (!t_iseq(buf, ':'))
120 3285 : return buf;
121 :
122 318 : buf++;
123 744 : while (*buf && pg_mblen(buf) == 1)
124 : {
5710 tgl 125 CBC 534 : switch (*buf)
126 : {
127 117 : case 'a':
5710 tgl 128 ECB : case 'A':
5710 tgl 129 GIC 117 : *weight |= 1 << 3;
5710 tgl 130 CBC 117 : break;
131 33 : case 'b':
132 : case 'B':
133 33 : *weight |= 1 << 2;
134 33 : break;
5710 tgl 135 GIC 57 : case 'c':
5710 tgl 136 ECB : case 'C':
5710 tgl 137 GIC 57 : *weight |= 1 << 1;
5710 tgl 138 CBC 57 : break;
5710 tgl 139 GIC 60 : case 'd':
5710 tgl 140 ECB : case 'D':
5710 tgl 141 CBC 60 : *weight |= 1;
142 60 : break;
5441 tgl 143 GIC 159 : case '*':
5441 tgl 144 CBC 159 : *prefix = true;
145 159 : break;
5710 146 108 : default:
5710 tgl 147 GIC 108 : return buf;
5710 tgl 148 ECB : }
5710 tgl 149 CBC 426 : buf++;
5710 tgl 150 ECB : }
151 :
5710 tgl 152 CBC 210 : return buf;
5710 tgl 153 ECB : }
154 :
2558 teodor 155 : /*
156 : * Parse phrase operator. The operator
157 : * may take the following forms:
158 : *
159 : * a <N> b (distance is exactly N lexemes)
160 : * a <-> b (default distance = 1)
161 : *
162 : * The buffer should begin with '<' char
163 : */
164 : static bool
1830 teodor 165 GIC 4539 : parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
166 : {
167 : enum
168 : {
169 : PHRASE_OPEN = 0,
170 : PHRASE_DIST,
171 : PHRASE_CLOSE,
172 : PHRASE_FINISH
2495 rhaas 173 4539 : } state = PHRASE_OPEN;
1830 teodor 174 4539 : char *ptr = pstate->buf;
175 : char *endptr;
2113 tgl 176 CBC 4539 : long l = 1; /* default distance */
177 :
2558 teodor 178 GIC 11682 : while (*ptr)
179 : {
2495 rhaas 180 5492 : switch (state)
181 : {
2558 teodor 182 2888 : case PHRASE_OPEN:
1830 183 2888 : if (t_iseq(ptr, '<'))
1830 teodor 184 ECB : {
1830 teodor 185 CBC 870 : state = PHRASE_DIST;
1830 teodor 186 GIC 870 : ptr++;
1830 teodor 187 ECB : }
188 : else
1830 teodor 189 CBC 2018 : return false;
2558 teodor 190 GIC 870 : break;
2558 teodor 191 ECB :
2558 teodor 192 GIC 870 : case PHRASE_DIST:
2558 teodor 193 CBC 870 : if (t_iseq(ptr, '-'))
2558 teodor 194 ECB : {
2558 teodor 195 GIC 723 : state = PHRASE_CLOSE;
2558 teodor 196 CBC 723 : ptr++;
1830 197 723 : continue;
198 : }
199 :
2113 tgl 200 147 : if (!t_isdigit(ptr))
1830 teodor 201 LBC 0 : return false;
202 :
2113 tgl 203 CBC 147 : errno = 0;
2558 teodor 204 147 : l = strtol(ptr, &endptr, 10);
2558 teodor 205 GIC 147 : if (ptr == endptr)
1830 teodor 206 LBC 0 : return false;
2113 tgl 207 CBC 147 : else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
103 tgl 208 GNC 3 : ereturn(pstate->escontext, false,
209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 : errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
2558 teodor 211 ECB : MAXENTRYPOS)));
2558 teodor 212 EUB : else
213 : {
2558 teodor 214 CBC 144 : state = PHRASE_CLOSE;
215 144 : ptr = endptr;
2558 teodor 216 ECB : }
2558 teodor 217 GBC 144 : break;
2558 teodor 218 ECB :
2558 teodor 219 CBC 867 : case PHRASE_CLOSE:
2558 teodor 220 GIC 867 : if (t_iseq(ptr, '>'))
221 : {
222 867 : state = PHRASE_FINISH;
223 867 : ptr++;
224 : }
2558 teodor 225 ECB : else
1830 teodor 226 LBC 0 : return false;
2558 teodor 227 GIC 867 : break;
2558 teodor 228 ECB :
2558 teodor 229 GIC 867 : case PHRASE_FINISH:
2558 teodor 230 CBC 867 : *distance = (int16) l;
1830 231 867 : pstate->buf = ptr;
1830 teodor 232 GIC 867 : return true;
2558 teodor 233 ECB : }
234 : }
235 :
1830 teodor 236 GIC 1651 : return false;
2558 teodor 237 EUB : }
2558 teodor 238 ECB :
239 : /*
1830 240 : * Parse OR operator used in websearch_to_tsquery(), returns true if we
241 : * believe that "OR" literal could be an operator OR
5693 242 : */
1830 243 : static bool
1830 teodor 244 GIC 771 : parse_or_operator(TSQueryParserState pstate)
245 : {
1809 tgl 246 771 : char *ptr = pstate->buf;
1830 teodor 247 ECB :
248 : /* it should begin with "OR" literal */
1830 teodor 249 GIC 771 : if (pg_strncasecmp(ptr, "or", 2) != 0)
250 699 : return false;
251 :
252 72 : ptr += 2;
253 :
254 : /*
1809 tgl 255 ECB : * it shouldn't be a part of any word but somewhere later it should be
256 : * some operand
1830 teodor 257 : */
1809 tgl 258 GIC 72 : if (*ptr == '\0') /* no operand */
1830 teodor 259 3 : return false;
1830 teodor 260 ECB :
261 : /* it shouldn't be a part of any word */
185 tgl 262 GNC 69 : if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
1830 teodor 263 CBC 12 : return false;
264 :
265 : for (;;)
266 : {
1830 teodor 267 GIC 57 : ptr += pg_mblen(ptr);
268 :
1809 tgl 269 CBC 57 : if (*ptr == '\0') /* got end of string without operand */
1830 teodor 270 6 : return false;
271 :
272 : /*
1809 tgl 273 ECB : * Suppose, we found an operand, but could be a not correct operand.
274 : * So we still treat OR literal as operation with possibly incorrect
275 : * operand and will not search it as lexeme
276 : */
1830 teodor 277 GIC 51 : if (!t_isspace(ptr))
1830 teodor 278 CBC 51 : break;
279 : }
1830 teodor 280 ECB :
1830 teodor 281 CBC 51 : pstate->buf += 2;
1830 teodor 282 GIC 51 : return true;
283 : }
284 :
285 : static ts_tokentype
286 8745 : gettoken_query_standard(TSQueryParserState state, int8 *operator,
287 : int *lenval, char **strval,
1830 teodor 288 ECB : int16 *weight, bool *prefix)
5710 tgl 289 : {
5441 tgl 290 GIC 8745 : *weight = 0;
291 8745 : *prefix = false;
5441 tgl 292 ECB :
1830 teodor 293 : while (true)
294 : {
5710 tgl 295 GIC 11692 : switch (state->state)
296 : {
5710 tgl 297 CBC 6077 : case WAITFIRSTOPERAND:
298 : case WAITOPERAND:
5710 tgl 299 GIC 6077 : if (t_iseq(state->buf, '!'))
300 : {
1830 teodor 301 CBC 465 : state->buf++;
5710 tgl 302 465 : state->state = WAITOPERAND;
1830 teodor 303 GIC 465 : *operator = OP_NOT;
5693 304 465 : return PT_OPR;
305 : }
5710 tgl 306 CBC 5612 : else if (t_iseq(state->buf, '('))
307 : {
1830 teodor 308 531 : state->buf++;
5710 tgl 309 GIC 531 : state->state = WAITOPERAND;
1830 teodor 310 CBC 531 : state->count++;
5693 teodor 311 GIC 531 : return PT_OPEN;
5710 tgl 312 ECB : }
5710 tgl 313 CBC 5081 : else if (t_iseq(state->buf, ':'))
5710 tgl 314 ECB : {
315 : /* generic syntax error message is fine */
103 tgl 316 UNC 0 : return PT_ERR;
5710 tgl 317 ECB : }
5710 tgl 318 CBC 5081 : else if (!t_isspace(state->buf))
5710 tgl 319 ECB : {
1809 320 : /*
321 : * We rely on the tsvector parser to parse the value for
322 : * us
323 : */
5693 teodor 324 GIC 3615 : reset_tsvector_parser(state->valstate, state->buf);
1830 teodor 325 GBC 3615 : if (gettoken_tsvector(state->valstate, strval, lenval,
326 : NULL, NULL, &state->buf))
5710 tgl 327 ECB : {
5441 tgl 328 GIC 3603 : state->buf = get_modifiers(state->buf, weight, prefix);
5710 329 3603 : state->state = WAITOPERATOR;
5693 teodor 330 3603 : return PT_VAL;
331 : }
103 tgl 332 GNC 12 : else if (SOFT_ERROR_OCCURRED(state->escontext))
333 : {
334 : /* gettoken_tsvector reported a soft error */
103 tgl 335 UNC 0 : return PT_ERR;
336 : }
5710 tgl 337 GIC 12 : else if (state->state == WAITFIRSTOPERAND)
1830 teodor 338 ECB : {
5693 teodor 339 CBC 12 : return PT_END;
340 : }
341 : else
103 tgl 342 UNC 0 : ereturn(state->escontext, PT_ERR,
5710 tgl 343 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
5649 344 : errmsg("no operand in tsquery: \"%s\"",
345 : state->buffer)));
5710 346 : }
5710 tgl 347 GIC 1466 : break;
348 :
5710 tgl 349 GBC 5615 : case WAITOPERATOR:
5693 teodor 350 GIC 5615 : if (t_iseq(state->buf, '&'))
5693 teodor 351 ECB : {
1830 teodor 352 GIC 665 : state->buf++;
5693 teodor 353 CBC 665 : state->state = WAITOPERAND;
5693 teodor 354 GIC 665 : *operator = OP_AND;
355 665 : return PT_OPR;
5693 teodor 356 EUB : }
2558 teodor 357 GIC 4950 : else if (t_iseq(state->buf, '|'))
358 : {
1830 359 411 : state->buf++;
5710 tgl 360 411 : state->state = WAITOPERAND;
5693 teodor 361 CBC 411 : *operator = OP_OR;
5693 teodor 362 GIC 411 : return PT_OPR;
5710 tgl 363 ECB : }
1830 teodor 364 CBC 4539 : else if (parse_phrase_operator(state, weight))
365 : {
1830 teodor 366 ECB : /* weight var is used as storage for distance */
2558 teodor 367 CBC 867 : state->state = WAITOPERAND;
368 867 : *operator = OP_PHRASE;
369 867 : return PT_OPR;
370 : }
103 tgl 371 GNC 3672 : else if (SOFT_ERROR_OCCURRED(state->escontext))
372 : {
373 : /* parse_phrase_operator reported a soft error */
374 3 : return PT_ERR;
375 : }
5710 tgl 376 CBC 3669 : else if (t_iseq(state->buf, ')'))
377 : {
1830 teodor 378 531 : state->buf++;
5710 tgl 379 531 : state->count--;
5693 teodor 380 531 : return (state->count < 0) ? PT_ERR : PT_CLOSE;
5710 tgl 381 ECB : }
1830 teodor 382 GIC 3138 : else if (*state->buf == '\0')
1830 teodor 383 ECB : {
5693 teodor 384 GIC 1651 : return (state->count) ? PT_ERR : PT_END;
385 : }
5710 tgl 386 CBC 1487 : else if (!t_isspace(state->buf))
1830 teodor 387 ECB : {
5693 teodor 388 CBC 6 : return PT_ERR;
389 : }
1830 390 1481 : break;
391 : }
392 :
393 2947 : state->buf += pg_mblen(state->buf);
394 : }
1830 teodor 395 ECB : }
396 :
397 : static ts_tokentype
1830 teodor 398 CBC 1116 : gettoken_query_websearch(TSQueryParserState state, int8 *operator,
1830 teodor 399 ECB : int *lenval, char **strval,
400 : int16 *weight, bool *prefix)
401 : {
1830 teodor 402 GIC 1116 : *weight = 0;
1830 teodor 403 CBC 1116 : *prefix = false;
404 :
1830 teodor 405 ECB : while (true)
406 : {
1830 teodor 407 CBC 1539 : switch (state->state)
408 : {
409 723 : case WAITFIRSTOPERAND:
410 : case WAITOPERAND:
1830 teodor 411 GIC 723 : if (t_iseq(state->buf, '-'))
1830 teodor 412 ECB : {
1830 teodor 413 GIC 33 : state->buf++;
414 33 : state->state = WAITOPERAND;
415 :
416 33 : *operator = OP_NOT;
1830 teodor 417 CBC 33 : return PT_OPR;
418 : }
1830 teodor 419 GIC 690 : else if (t_iseq(state->buf, '"'))
420 : {
706 akorotkov 421 ECB : /* Everything in quotes is processed as a single token */
422 :
423 : /* skip opening quote */
1830 teodor 424 GIC 96 : state->buf++;
706 akorotkov 425 96 : *strval = state->buf;
1830 teodor 426 ECB :
427 : /* iterate to the closing quote or end of the string */
706 akorotkov 428 CBC 870 : while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
706 akorotkov 429 GIC 774 : state->buf++;
706 akorotkov 430 CBC 96 : *lenval = state->buf - *strval;
431 :
706 akorotkov 432 ECB : /* skip closing quote if not end of the string */
706 akorotkov 433 CBC 96 : if (*state->buf != '\0')
706 akorotkov 434 GIC 84 : state->buf++;
1830 teodor 435 ECB :
706 akorotkov 436 CBC 96 : state->state = WAITOPERATOR;
706 akorotkov 437 GIC 96 : state->count++;
706 akorotkov 438 CBC 96 : return PT_VAL;
439 : }
1830 teodor 440 GIC 594 : else if (ISOPERATOR(state->buf))
441 : {
442 : /* or else gettoken_tsvector() will raise an error */
1830 teodor 443 CBC 96 : state->buf++;
444 96 : state->state = WAITOPERAND;
1830 teodor 445 GIC 96 : continue;
446 : }
1830 teodor 447 CBC 498 : else if (!t_isspace(state->buf))
1830 teodor 448 ECB : {
1809 tgl 449 : /*
450 : * We rely on the tsvector parser to parse the value for
451 : * us
452 : */
1830 teodor 453 CBC 450 : reset_tsvector_parser(state->valstate, state->buf);
1830 teodor 454 GIC 450 : if (gettoken_tsvector(state->valstate, strval, lenval,
1830 teodor 455 ECB : NULL, NULL, &state->buf))
456 : {
1830 teodor 457 CBC 441 : state->state = WAITOPERATOR;
1830 teodor 458 GIC 441 : return PT_VAL;
1830 teodor 459 ECB : }
103 tgl 460 GNC 9 : else if (SOFT_ERROR_OCCURRED(state->escontext))
461 : {
462 : /* gettoken_tsvector reported a soft error */
103 tgl 463 UNC 0 : return PT_ERR;
464 : }
1830 teodor 465 GIC 9 : else if (state->state == WAITFIRSTOPERAND)
466 : {
1830 teodor 467 LBC 0 : return PT_END;
1830 teodor 468 ECB : }
469 : else
470 : {
471 : /* finally, we have to provide an operand */
1830 teodor 472 GIC 9 : pushStop(state);
473 9 : return PT_END;
474 : }
475 : }
5710 tgl 476 48 : break;
1830 teodor 477 ECB :
1830 teodor 478 CBC 816 : case WAITOPERATOR:
1830 teodor 479 GIC 816 : if (t_iseq(state->buf, '"'))
480 : {
706 akorotkov 481 ECB : /*
482 : * put implicit AND after an operand and handle this quote
483 : * in WAITOPERAND
484 : */
706 akorotkov 485 GIC 45 : state->state = WAITOPERAND;
486 45 : *operator = OP_AND;
706 akorotkov 487 GBC 45 : return PT_OPR;
488 : }
1830 teodor 489 CBC 771 : else if (parse_or_operator(state))
490 : {
1830 teodor 491 GBC 51 : state->state = WAITOPERAND;
1830 teodor 492 GIC 51 : *operator = OP_OR;
493 51 : return PT_OPR;
494 : }
495 720 : else if (*state->buf == '\0')
1830 teodor 496 ECB : {
5693 teodor 497 CBC 195 : return PT_END;
498 : }
1830 teodor 499 GIC 525 : else if (!t_isspace(state->buf))
1830 teodor 500 ECB : {
501 : /* put implicit AND after an operand */
706 akorotkov 502 CBC 246 : *operator = OP_AND;
1830 teodor 503 246 : state->state = WAITOPERAND;
1830 teodor 504 GIC 246 : return PT_OPR;
505 : }
5710 tgl 506 279 : break;
507 : }
508 :
5710 tgl 509 CBC 327 : state->buf += pg_mblen(state->buf);
5710 tgl 510 ECB : }
511 : }
512 :
1830 teodor 513 : static ts_tokentype
1830 teodor 514 GIC 108 : gettoken_query_plain(TSQueryParserState state, int8 *operator,
1830 teodor 515 ECB : int *lenval, char **strval,
516 : int16 *weight, bool *prefix)
517 : {
1830 teodor 518 GIC 108 : *weight = 0;
1830 teodor 519 CBC 108 : *prefix = false;
520 :
521 108 : if (*state->buf == '\0')
1830 teodor 522 GIC 54 : return PT_END;
1830 teodor 523 ECB :
1830 teodor 524 GIC 54 : *strval = state->buf;
525 54 : *lenval = strlen(state->buf);
1830 teodor 526 CBC 54 : state->buf += *lenval;
527 54 : state->count++;
528 54 : return PT_VAL;
529 : }
1830 teodor 530 ECB :
531 : /*
532 : * Push an operator to state->polstr
5710 tgl 533 : */
534 : void
2558 teodor 535 GIC 3119 : pushOperator(TSQueryParserState state, int8 oper, int16 distance)
536 : {
537 : QueryOperator *tmp;
5693 teodor 538 ECB :
2558 teodor 539 GIC 3119 : Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
540 :
5476 tgl 541 3119 : tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
5693 teodor 542 CBC 3119 : tmp->type = QI_OPR;
543 3119 : tmp->oper = oper;
2558 teodor 544 GIC 3119 : tmp->distance = (oper == OP_PHRASE) ? distance : 0;
5693 teodor 545 ECB : /* left is filled in later with findoprnd */
546 :
5693 teodor 547 GIC 3119 : state->polstr = lcons(tmp, state->polstr);
5693 teodor 548 CBC 3119 : }
5693 teodor 549 ECB :
550 : static void
5441 tgl 551 CBC 4197 : pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
5693 teodor 552 ECB : {
553 : QueryOperand *tmp;
554 :
5710 tgl 555 GIC 4197 : if (distance >= MAXSTRPOS)
103 tgl 556 UNC 0 : ereturn(state->escontext,,
557 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
558 : errmsg("value is too big in tsquery: \"%s\"",
5710 tgl 559 ECB : state->buffer)));
5710 tgl 560 GIC 4197 : if (lenval >= MAXSTRLEN)
103 tgl 561 UNC 0 : ereturn(state->escontext,,
562 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5649 tgl 563 ECB : errmsg("operand is too long in tsquery: \"%s\"",
564 : state->buffer)));
5693 teodor 565 :
5476 tgl 566 CBC 4197 : tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
5693 teodor 567 4197 : tmp->type = QI_VAL;
568 4197 : tmp->weight = weight;
5441 tgl 569 GIC 4197 : tmp->prefix = prefix;
5693 teodor 570 4197 : tmp->valcrc = (int32) valcrc;
5710 tgl 571 CBC 4197 : tmp->length = lenval;
5693 teodor 572 4197 : tmp->distance = distance;
573 :
5693 teodor 574 GIC 4197 : state->polstr = lcons(tmp, state->polstr);
5710 tgl 575 ECB : }
576 :
577 : /*
578 : * Push an operand to state->polstr.
5693 teodor 579 : *
5693 teodor 580 EUB : * strval must point to a string equal to state->curop. lenval is the length
581 : * of the string.
582 : */
583 : void
3940 peter_e 584 CBC 4197 : pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
5710 tgl 585 EUB : {
586 : pg_crc32 valcrc;
587 :
5710 tgl 588 GIC 4197 : if (lenval >= MAXSTRLEN)
103 tgl 589 UNC 0 : ereturn(state->escontext,,
5611 tgl 590 ECB : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5649 591 : errmsg("word is too long in tsquery: \"%s\"",
5710 592 : state->buffer)));
593 :
3078 heikki.linnakangas 594 CBC 4197 : INIT_LEGACY_CRC32(valcrc);
595 14802 : COMP_LEGACY_CRC32(valcrc, strval, lenval);
596 4197 : FIN_LEGACY_CRC32(valcrc);
5441 tgl 597 GIC 4197 : pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
5710 tgl 598 ECB :
599 : /* append the value string to state.op, enlarging buffer if needed first */
5710 tgl 600 GIC 4197 : while (state->curop - state->op + lenval + 1 >= state->lenop)
601 : {
5624 bruce 602 UIC 0 : int used = state->curop - state->op;
603 :
5710 tgl 604 0 : state->lenop *= 2;
61 peter 605 UNC 0 : state->op = (char *) repalloc(state->op, state->lenop);
5693 teodor 606 UIC 0 : state->curop = state->op + used;
607 : }
61 peter 608 GNC 4197 : memcpy(state->curop, strval, lenval);
5710 tgl 609 GIC 4197 : state->curop += lenval;
610 4197 : *(state->curop) = '\0';
611 4197 : state->curop++;
5710 tgl 612 CBC 4197 : state->sumlen += lenval + 1 /* \0 */ ;
5710 tgl 613 EUB : }
614 :
615 :
616 : /*
617 : * Push a stopword placeholder to state->polstr
5693 teodor 618 ECB : */
619 : void
5693 teodor 620 CBC 342 : pushStop(TSQueryParserState state)
5693 teodor 621 ECB : {
622 : QueryOperand *tmp;
623 :
5476 tgl 624 CBC 342 : tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
5693 teodor 625 GIC 342 : tmp->type = QI_VALSTOP;
5693 teodor 626 EUB :
5693 teodor 627 GIC 342 : state->polstr = lcons(tmp, state->polstr);
5693 teodor 628 GBC 342 : }
5693 teodor 629 EUB :
630 :
631 : #define STACKDEPTH 32
5700 tgl 632 ECB :
2428 633 : typedef struct OperatorElement
634 : {
635 : int8 op;
636 : int16 distance;
637 : } OperatorElement;
638 :
639 : static void
2477 teodor 640 GIC 2783 : pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
641 : {
2428 tgl 642 2783 : if (*lenstack == STACKDEPTH) /* internal error */
2477 teodor 643 UIC 0 : elog(ERROR, "tsquery stack too small");
2477 teodor 644 ECB :
2477 teodor 645 GIC 2783 : stack[*lenstack].op = op;
646 2783 : stack[*lenstack].distance = distance;
647 :
2477 teodor 648 CBC 2783 : (*lenstack)++;
649 2783 : }
650 :
2477 teodor 651 ECB : static void
2477 teodor 652 CBC 5235 : cleanOpStack(TSQueryParserState state,
653 : OperatorElement *stack, int *lenstack, int8 op)
654 : {
2428 tgl 655 GIC 5235 : int opPriority = OP_PRIORITY(op);
656 :
657 8018 : while (*lenstack)
658 : {
659 : /* NOT is right associative unlike to others */
2459 teodor 660 3029 : if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
2118 tgl 661 159 : (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
662 : break;
663 :
2477 teodor 664 CBC 2783 : (*lenstack)--;
2477 teodor 665 GIC 2783 : pushOperator(state, stack[*lenstack].op,
2428 tgl 666 CBC 2783 : stack[*lenstack].distance);
2477 teodor 667 EUB : }
2477 teodor 668 GIC 5235 : }
2477 teodor 669 ECB :
5710 tgl 670 : /*
671 : * Make polish (prefix) notation of query.
5693 teodor 672 : *
673 : * See parse_tsquery for explanation of pushval.
674 : */
675 : static void
5624 bruce 676 CBC 2461 : makepol(TSQueryParserState state,
677 : PushFunction pushval,
678 : Datum opaque)
5710 tgl 679 ECB : {
2495 rhaas 680 GIC 2461 : int8 operator = 0;
2495 rhaas 681 ECB : ts_tokentype type;
2495 rhaas 682 GIC 2461 : int lenval = 0;
683 2461 : char *strval = NULL;
2428 tgl 684 ECB : OperatorElement opstack[STACKDEPTH];
2495 rhaas 685 CBC 2461 : int lenstack = 0;
2495 rhaas 686 GIC 2461 : int16 weight = 0;
687 : bool prefix;
5710 tgl 688 ECB :
5700 689 : /* since this function recurses, it could be driven to stack overflow */
5700 tgl 690 CBC 2461 : check_stack_depth();
691 :
1830 teodor 692 9969 : while ((type = state->gettoken(state, &operator,
693 : &lenval, &strval,
1830 teodor 694 GIC 9969 : &weight, &prefix)) != PT_END)
695 : {
5710 tgl 696 8048 : switch (type)
697 : {
5693 teodor 698 4194 : case PT_VAL:
5441 tgl 699 4194 : pushval(opaque, state, strval, lenval, weight, prefix);
5710 tgl 700 CBC 4194 : break;
5693 teodor 701 GIC 2783 : case PT_OPR:
2477 702 2783 : cleanOpStack(state, opstack, &lenstack, operator);
703 2783 : pushOpStack(opstack, &lenstack, operator, weight);
5710 tgl 704 CBC 2783 : break;
5693 teodor 705 GIC 531 : case PT_OPEN:
5693 teodor 706 CBC 531 : makepol(state, pushval, opaque);
5710 tgl 707 531 : break;
5693 teodor 708 GIC 531 : case PT_CLOSE:
2428 tgl 709 CBC 531 : cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
5693 teodor 710 540 : return;
5693 teodor 711 GIC 9 : case PT_ERR:
712 : default:
713 : /* don't overwrite a soft error saved by gettoken function */
103 tgl 714 GNC 9 : if (!SOFT_ERROR_OCCURRED(state->escontext))
715 6 : errsave(state->escontext,
716 : (errcode(ERRCODE_SYNTAX_ERROR),
717 : errmsg("syntax error in tsquery: \"%s\"",
718 : state->buffer)));
719 9 : return;
720 : }
721 : /* detect soft error in pushval or recursion */
722 7508 : if (SOFT_ERROR_OCCURRED(state->escontext))
103 tgl 723 UNC 0 : return;
5710 tgl 724 ECB : }
725 :
2428 tgl 726 CBC 1921 : cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
727 : }
5710 tgl 728 ECB :
729 : static void
2558 teodor 730 CBC 7649 : findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
5710 tgl 731 ECB : {
5693 teodor 732 : /* since this function recurses, it could be driven to stack overflow. */
5693 teodor 733 CBC 7649 : check_stack_depth();
5693 teodor 734 ECB :
5693 teodor 735 CBC 7649 : if (*pos >= nnodes)
5611 tgl 736 LBC 0 : elog(ERROR, "malformed tsquery: operand not found");
5693 teodor 737 ECB :
2558 teodor 738 CBC 7649 : if (ptr[*pos].type == QI_VAL)
2558 teodor 739 ECB : {
2558 teodor 740 CBC 4188 : (*pos)++;
2558 teodor 741 ECB : }
2558 teodor 742 GIC 3461 : else if (ptr[*pos].type == QI_VALSTOP)
743 : {
2495 rhaas 744 CBC 342 : *needcleanup = true; /* we'll have to remove stop words */
5710 tgl 745 342 : (*pos)++;
746 : }
747 : else
748 : {
5693 teodor 749 3119 : Assert(ptr[*pos].type == QI_OPR);
750 :
5015 peter_e 751 GIC 3119 : if (ptr[*pos].qoperator.oper == OP_NOT)
5693 teodor 752 ECB : {
2118 tgl 753 GBC 498 : ptr[*pos].qoperator.left = 1; /* fixed offset */
5693 teodor 754 GIC 498 : (*pos)++;
755 :
2558 teodor 756 ECB : /* process the only argument */
2558 teodor 757 GIC 498 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
758 : }
759 : else
5693 teodor 760 ECB : {
2495 rhaas 761 GIC 2621 : QueryOperator *curitem = &ptr[*pos].qoperator;
2118 tgl 762 2621 : int tmp = *pos; /* save current position */
2558 teodor 763 ECB :
2558 teodor 764 GIC 2621 : Assert(curitem->oper == OP_AND ||
2558 teodor 765 ECB : curitem->oper == OP_OR ||
2558 teodor 766 EUB : curitem->oper == OP_PHRASE);
767 :
5693 teodor 768 CBC 2621 : (*pos)++;
769 :
2558 teodor 770 ECB : /* process RIGHT argument */
2558 teodor 771 GIC 2621 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
2300 tgl 772 ECB :
2558 teodor 773 GIC 2621 : curitem->left = *pos - tmp; /* set LEFT arg's offset */
2558 teodor 774 ECB :
775 : /* process LEFT argument */
2558 teodor 776 GIC 2621 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
777 : }
778 : }
5710 tgl 779 CBC 7649 : }
780 :
5693 teodor 781 ECB :
782 : /*
2300 tgl 783 : * Fill in the left-fields previously left unfilled.
784 : * The input QueryItems must be in polish (prefix) notation.
785 : * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
786 : */
5693 teodor 787 : static void
2558 teodor 788 GIC 1909 : findoprnd(QueryItem *ptr, int size, bool *needcleanup)
789 : {
790 : uint32 pos;
5693 teodor 791 ECB :
2558 teodor 792 CBC 1909 : *needcleanup = false;
5693 teodor 793 GIC 1909 : pos = 0;
2558 teodor 794 CBC 1909 : findoprnd_recurse(ptr, &pos, size, needcleanup);
795 :
5693 teodor 796 GIC 1909 : if (pos != size)
5611 tgl 797 UIC 0 : elog(ERROR, "malformed tsquery: extra nodes");
5693 teodor 798 CBC 1909 : }
799 :
800 :
5710 tgl 801 ECB : /*
802 : * Parse the tsquery stored in "buf".
803 : *
804 : * Each value (operand) in the query is passed to pushval. pushval can
5693 teodor 805 : * transform the simple value to an arbitrarily complex expression using
806 : * pushValue and pushOperator. It must push a single value with pushValue,
807 : * a complete expression with all operands, or a stopword placeholder
808 : * with pushStop, otherwise the prefix notation representation will be broken,
809 : * having an operator with no operand.
810 : *
5624 bruce 811 : * opaque is passed on to pushval as is, pushval can use it to store its
812 : * private state.
813 : *
814 : * The pushval function can record soft errors via escontext.
815 : * Callers must check SOFT_ERROR_OCCURRED to detect that.
816 : *
817 : * A bitmask of flags (see ts_utils.h) and an error context object
818 : * can be provided as well. If a soft error occurs, NULL is returned.
819 : */
820 : TSQuery
5624 bruce 821 GIC 1930 : parse_tsquery(char *buf,
822 : PushFunction pushval,
823 : Datum opaque,
824 : int flags,
825 : Node *escontext)
826 : {
5693 teodor 827 ECB : struct TSQueryParserStateData state;
828 : int i;
829 : TSQuery query;
830 : int commonlen;
5710 tgl 831 : QueryItem *ptr;
5693 teodor 832 : ListCell *cell;
833 : bool noisy;
2558 834 : bool needcleanup;
1830 teodor 835 GIC 1930 : int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
1830 teodor 836 ECB :
1830 teodor 837 EUB : /* plain should not be used with web */
1830 teodor 838 CBC 1930 : Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
839 :
840 : /* select suitable tokenizer */
1830 teodor 841 GIC 1930 : if (flags & P_TSQ_PLAIN)
842 54 : state.gettoken = gettoken_query_plain;
843 1876 : else if (flags & P_TSQ_WEB)
844 : {
845 204 : state.gettoken = gettoken_query_websearch;
846 204 : tsv_flags |= P_TSV_IS_WEB;
847 : }
848 : else
849 1672 : state.gettoken = gettoken_query_standard;
850 :
851 : /* emit nuisance NOTICEs only if not doing soft errors */
103 tgl 852 GNC 1930 : noisy = !(escontext && IsA(escontext, ErrorSaveContext));
853 :
854 : /* init state */
5710 tgl 855 GIC 1930 : state.buffer = buf;
856 1930 : state.buf = buf;
857 1930 : state.count = 0;
1830 teodor 858 1930 : state.state = WAITFIRSTOPERAND;
5693 859 1930 : state.polstr = NIL;
103 tgl 860 GNC 1930 : state.escontext = escontext;
861 :
862 : /* init value parser's state */
863 1930 : state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
864 :
5710 tgl 865 ECB : /* init list of operand */
5710 tgl 866 GIC 1930 : state.sumlen = 0;
867 1930 : state.lenop = 64;
868 1930 : state.curop = state.op = (char *) palloc(state.lenop);
869 1930 : *(state.curop) = '\0';
870 :
871 : /* parse query & make polish notation (postfix, but in reverse order) */
5693 teodor 872 1930 : makepol(&state, pushval, opaque);
873 :
874 1930 : close_tsvector_parser(state.valstate);
875 :
103 tgl 876 GNC 1930 : if (SOFT_ERROR_OCCURRED(escontext))
877 9 : return NULL;
878 :
235 879 1921 : if (state.polstr == NIL)
880 : {
103 881 12 : if (noisy)
882 12 : ereport(NOTICE,
883 : (errmsg("text-search query doesn't contain lexemes: \"%s\"",
884 : state.buffer)));
5710 tgl 885 GIC 12 : query = (TSQuery) palloc(HDRSIZETQ);
5710 tgl 886 CBC 12 : SET_VARSIZE(query, HDRSIZETQ);
5710 tgl 887 GIC 12 : query->size = 0;
888 12 : return query;
5710 tgl 889 ECB : }
890 :
3338 noah 891 CBC 1909 : if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
103 tgl 892 UNC 0 : ereturn(escontext, NULL,
3338 noah 893 ECB : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
894 : errmsg("tsquery is too large")));
5693 teodor 895 GIC 1909 : commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
896 :
3338 noah 897 ECB : /* Pack the QueryItems in the final TSQuery struct to return to caller */
5693 teodor 898 GIC 1909 : query = (TSQuery) palloc0(commonlen);
5710 tgl 899 1909 : SET_VARSIZE(query, commonlen);
5693 teodor 900 CBC 1909 : query->size = list_length(state.polstr);
5710 tgl 901 GIC 1909 : ptr = GETQUERY(query);
902 :
5693 teodor 903 ECB : /* Copy QueryItems to TSQuery */
5693 teodor 904 CBC 1909 : i = 0;
905 9558 : foreach(cell, state.polstr)
5710 tgl 906 ECB : {
2495 rhaas 907 CBC 7649 : QueryItem *item = (QueryItem *) lfirst(cell);
5693 teodor 908 ECB :
5624 bruce 909 GIC 7649 : switch (item->type)
910 : {
5693 teodor 911 CBC 4188 : case QI_VAL:
5693 teodor 912 GIC 4188 : memcpy(&ptr[i], item, sizeof(QueryOperand));
913 4188 : break;
5693 teodor 914 CBC 342 : case QI_VALSTOP:
915 342 : ptr[i].type = QI_VALSTOP;
916 342 : break;
917 3119 : case QI_OPR:
5693 teodor 918 GIC 3119 : memcpy(&ptr[i], item, sizeof(QueryOperator));
919 3119 : break;
5693 teodor 920 LBC 0 : default:
5611 tgl 921 UIC 0 : elog(ERROR, "unrecognized QueryItem type: %d", item->type);
5693 teodor 922 ECB : }
5693 teodor 923 GIC 7649 : i++;
5710 tgl 924 ECB : }
925 :
926 : /* Copy all the operand strings to TSQuery */
61 peter 927 GNC 1909 : memcpy(GETOPERAND(query), state.op, state.sumlen);
5710 tgl 928 GIC 1909 : pfree(state.op);
5710 tgl 929 ECB :
2300 930 : /*
931 : * Set left operand pointers for every operator. While we're at it,
932 : * detect whether there are any QI_VALSTOP nodes.
933 : */
2558 teodor 934 CBC 1909 : findoprnd(ptr, query->size, &needcleanup);
2558 teodor 935 ECB :
936 : /*
937 : * If there are QI_VALSTOP nodes, delete them and simplify the tree.
938 : */
2558 teodor 939 CBC 1909 : if (needcleanup)
103 tgl 940 GNC 222 : query = cleanup_tsquery_stopwords(query, noisy);
941 :
5710 tgl 942 GIC 1909 : return query;
5710 tgl 943 ECB : }
944 :
945 : static void
5690 teodor 946 CBC 2650 : pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
5441 tgl 947 ECB : int16 weight, bool prefix)
5693 teodor 948 : {
5441 tgl 949 CBC 2650 : pushValue(state, strval, lenval, weight, prefix);
5693 teodor 950 GIC 2650 : }
951 :
5710 tgl 952 ECB : /*
953 : * in without morphology
954 : */
955 : Datum
5710 tgl 956 GIC 1295 : tsqueryin(PG_FUNCTION_ARGS)
5710 tgl 957 ECB : {
5710 tgl 958 GIC 1295 : char *in = PG_GETARG_CSTRING(0);
103 tgl 959 GNC 1295 : Node *escontext = fcinfo->context;
5710 tgl 960 ECB :
103 tgl 961 GNC 1295 : PG_RETURN_TSQUERY(parse_tsquery(in,
962 : pushval_asis,
963 : PointerGetDatum(NULL),
964 : 0,
965 : escontext));
5710 tgl 966 ECB : }
967 :
968 : /*
969 : * out function
970 : */
971 : typedef struct
972 : {
5710 tgl 973 EUB : QueryItem *curpol;
974 : char *buf;
975 : char *cur;
5710 tgl 976 ECB : char *op;
977 : int buflen;
978 : } INFIX;
979 :
5693 teodor 980 : /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
981 : #define RESIZEBUF(inf, addsize) \
982 : while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
983 : { \
984 : int len = (inf)->cur - (inf)->buf; \
985 : (inf)->buflen *= 2; \
986 : (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
5710 tgl 987 : (inf)->cur = (inf)->buf + len; \
988 : }
989 :
990 : /*
991 : * recursively traverse the tree and
2558 teodor 992 : * print it in infix (human-readable) form
5710 tgl 993 : */
994 : static void
2477 teodor 995 CBC 3623 : infix(INFIX *in, int parentPriority, bool rightPhraseOp)
996 : {
997 : /* since this function recurses, it could be driven to stack overflow. */
5693 teodor 998 GIC 3623 : check_stack_depth();
5693 teodor 999 ECB :
5693 teodor 1000 GIC 3623 : if (in->curpol->type == QI_VAL)
1001 : {
5015 peter_e 1002 CBC 2096 : QueryOperand *curpol = &in->curpol->qoperand;
5693 teodor 1003 2096 : char *op = in->op + curpol->distance;
1004 : int clen;
1005 :
5441 tgl 1006 GIC 3420 : RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
5710 1007 2096 : *(in->cur) = '\'';
1008 2096 : in->cur++;
5710 tgl 1009 CBC 8128 : while (*op)
1010 : {
1011 6032 : if (t_iseq(op, '\''))
5710 tgl 1012 ECB : {
5710 tgl 1013 GIC 6 : *(in->cur) = '\'';
5710 tgl 1014 CBC 6 : in->cur++;
1015 : }
5623 teodor 1016 GIC 6026 : else if (t_iseq(op, '\\'))
1017 : {
1018 3 : *(in->cur) = '\\';
1019 3 : in->cur++;
1020 : }
5710 tgl 1021 6032 : COPYCHAR(in->cur, op);
1022 :
1023 6032 : clen = pg_mblen(op);
1024 6032 : op += clen;
1025 6032 : in->cur += clen;
1026 : }
1027 2096 : *(in->cur) = '\'';
1028 2096 : in->cur++;
5441 1029 2096 : if (curpol->weight || curpol->prefix)
1030 : {
5710 1031 87 : *(in->cur) = ':';
1032 87 : in->cur++;
5050 bruce 1033 87 : if (curpol->prefix)
1034 : {
5441 tgl 1035 12 : *(in->cur) = '*';
1036 12 : in->cur++;
1037 : }
5693 teodor 1038 87 : if (curpol->weight & (1 << 3))
1039 : {
5710 tgl 1040 30 : *(in->cur) = 'A';
1041 30 : in->cur++;
1042 : }
5693 teodor 1043 87 : if (curpol->weight & (1 << 2))
1044 : {
5710 tgl 1045 48 : *(in->cur) = 'B';
1046 48 : in->cur++;
1047 : }
5693 teodor 1048 CBC 87 : if (curpol->weight & (1 << 1))
1049 : {
5710 tgl 1050 GIC 9 : *(in->cur) = 'C';
5710 tgl 1051 CBC 9 : in->cur++;
1052 : }
5693 teodor 1053 87 : if (curpol->weight & 1)
1054 : {
5710 tgl 1055 3 : *(in->cur) = 'D';
1056 3 : in->cur++;
1057 : }
1058 : }
1059 2096 : *(in->cur) = '\0';
1060 2096 : in->curpol++;
5710 tgl 1061 ECB : }
5015 peter_e 1062 CBC 1527 : else if (in->curpol->qoperator.oper == OP_NOT)
1063 : {
2477 teodor 1064 186 : int priority = QO_PRIORITY(in->curpol);
1065 :
2558 1066 186 : if (priority < parentPriority)
5710 tgl 1067 ECB : {
5710 tgl 1068 UIC 0 : RESIZEBUF(in, 2);
5710 tgl 1069 LBC 0 : sprintf(in->cur, "( ");
5710 tgl 1070 UIC 0 : in->cur = strchr(in->cur, '\0');
5710 tgl 1071 ECB : }
2558 teodor 1072 CBC 186 : RESIZEBUF(in, 1);
2558 teodor 1073 GIC 186 : *(in->cur) = '!';
2558 teodor 1074 CBC 186 : in->cur++;
2558 teodor 1075 GIC 186 : *(in->cur) = '\0';
2558 teodor 1076 CBC 186 : in->curpol++;
5693 teodor 1077 ECB :
2477 teodor 1078 CBC 186 : infix(in, priority, false);
2558 teodor 1079 GIC 186 : if (priority < parentPriority)
5710 tgl 1080 ECB : {
5710 tgl 1081 LBC 0 : RESIZEBUF(in, 2);
1082 0 : sprintf(in->cur, " )");
5710 tgl 1083 UIC 0 : in->cur = strchr(in->cur, '\0');
5710 tgl 1084 ECB : }
1085 : }
1086 : else
1087 : {
5015 peter_e 1088 CBC 1341 : int8 op = in->curpol->qoperator.oper;
2477 teodor 1089 1341 : int priority = QO_PRIORITY(in->curpol);
2558 teodor 1090 GIC 1341 : int16 distance = in->curpol->qoperator.distance;
5710 tgl 1091 ECB : INFIX nrm;
2558 teodor 1092 GIC 1341 : bool needParenthesis = false;
5710 tgl 1093 ECB :
5710 tgl 1094 CBC 1341 : in->curpol++;
2558 teodor 1095 GIC 1341 : if (priority < parentPriority ||
2428 tgl 1096 ECB : /* phrase operator depends on order */
2428 tgl 1097 GIC 360 : (op == OP_PHRASE && rightPhraseOp))
5710 tgl 1098 ECB : {
2558 teodor 1099 CBC 166 : needParenthesis = true;
5710 tgl 1100 GIC 166 : RESIZEBUF(in, 2);
5710 tgl 1101 CBC 166 : sprintf(in->cur, "( ");
5710 tgl 1102 GIC 166 : in->cur = strchr(in->cur, '\0');
5710 tgl 1103 ECB : }
1104 :
5710 tgl 1105 GIC 1341 : nrm.curpol = in->curpol;
5710 tgl 1106 CBC 1341 : nrm.op = in->op;
5710 tgl 1107 GIC 1341 : nrm.buflen = 16;
5710 tgl 1108 CBC 1341 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
5710 tgl 1109 ECB :
1110 : /* get right operand */
2477 teodor 1111 GIC 1341 : infix(&nrm, priority, (op == OP_PHRASE));
5710 tgl 1112 ECB :
1113 : /* get & print left operand */
5710 tgl 1114 GIC 1341 : in->curpol = nrm.curpol;
2477 teodor 1115 CBC 1341 : infix(in, priority, false);
1116 :
5710 tgl 1117 ECB : /* print operator & right operand */
2495 rhaas 1118 GIC 1828 : RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
5624 bruce 1119 CBC 1341 : switch (op)
1120 : {
5693 teodor 1121 GBC 363 : case OP_OR:
1122 363 : sprintf(in->cur, " | %s", nrm.buf);
1123 363 : break;
5693 teodor 1124 GIC 612 : case OP_AND:
5693 teodor 1125 CBC 612 : sprintf(in->cur, " & %s", nrm.buf);
1126 612 : break;
2558 1127 366 : case OP_PHRASE:
1128 366 : if (distance != 1)
1129 87 : sprintf(in->cur, " <%d> %s", distance, nrm.buf);
1130 : else
1131 279 : sprintf(in->cur, " <-> %s", nrm.buf);
1132 366 : break;
5693 teodor 1133 UIC 0 : default:
5624 bruce 1134 EUB : /* OP_NOT is handled in above if-branch */
5611 tgl 1135 UBC 0 : elog(ERROR, "unrecognized operator type: %d", op);
5693 teodor 1136 EUB : }
5710 tgl 1137 GIC 1341 : in->cur = strchr(in->cur, '\0');
1138 1341 : pfree(nrm.buf);
1139 :
2558 teodor 1140 1341 : if (needParenthesis)
5710 tgl 1141 ECB : {
5710 tgl 1142 CBC 166 : RESIZEBUF(in, 2);
1143 166 : sprintf(in->cur, " )");
5710 tgl 1144 GIC 166 : in->cur = strchr(in->cur, '\0');
5710 tgl 1145 ECB : }
1146 : }
5710 tgl 1147 CBC 3623 : }
5710 tgl 1148 ECB :
1149 : Datum
5710 tgl 1150 CBC 770 : tsqueryout(PG_FUNCTION_ARGS)
1151 : {
1152 770 : TSQuery query = PG_GETARG_TSQUERY(0);
5710 tgl 1153 ECB : INFIX nrm;
1154 :
5710 tgl 1155 CBC 770 : if (query->size == 0)
1156 : {
5710 tgl 1157 GIC 15 : char *b = palloc(1);
5710 tgl 1158 ECB :
5710 tgl 1159 CBC 15 : *b = '\0';
1160 15 : PG_RETURN_POINTER(b);
5710 tgl 1161 ECB : }
5710 tgl 1162 GIC 755 : nrm.curpol = GETQUERY(query);
1163 755 : nrm.buflen = 32;
5710 tgl 1164 CBC 755 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
5710 tgl 1165 GIC 755 : *(nrm.cur) = '\0';
1166 755 : nrm.op = GETOPERAND(query);
2428 tgl 1167 CBC 755 : infix(&nrm, -1 /* lowest priority */ , false);
5710 tgl 1168 ECB :
5710 tgl 1169 GIC 755 : PG_FREE_IF_COPY(query, 0);
1170 755 : PG_RETURN_CSTRING(nrm.buf);
5710 tgl 1171 ECB : }
1172 :
1173 : /*
5693 teodor 1174 : * Binary Input / Output functions. The binary format is as follows:
1175 : *
1176 : * uint32 number of operators/operands in the query
5624 bruce 1177 : *
5693 teodor 1178 : * Followed by the operators and operands, in prefix notation. For each
1179 : * operand:
1180 : *
1181 : * uint8 type, QI_VAL
1182 : * uint8 weight
1183 : * operand text in client encoding, null-terminated
5441 tgl 1184 : * uint8 prefix
5693 teodor 1185 : *
5693 teodor 1186 EUB : * For each operator:
1187 : * uint8 type, QI_OPR
2558 1188 : * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
1189 : * uint16 distance (only for OP_PHRASE)
5693 teodor 1190 ECB : */
5710 tgl 1191 : Datum
5710 tgl 1192 UIC 0 : tsquerysend(PG_FUNCTION_ARGS)
5710 tgl 1193 ECB : {
5710 tgl 1194 UIC 0 : TSQuery query = PG_GETARG_TSQUERY(0);
5710 tgl 1195 ECB : StringInfoData buf;
1196 : int i;
5710 tgl 1197 LBC 0 : QueryItem *item = GETQUERY(query);
1198 :
5710 tgl 1199 UIC 0 : pq_begintypsend(&buf);
5710 tgl 1200 ECB :
2006 andres 1201 UIC 0 : pq_sendint32(&buf, query->size);
5710 tgl 1202 0 : for (i = 0; i < query->size; i++)
5710 tgl 1203 ECB : {
2006 andres 1204 UIC 0 : pq_sendint8(&buf, item->type);
5710 tgl 1205 ECB :
5624 bruce 1206 UIC 0 : switch (item->type)
1207 : {
5693 teodor 1208 LBC 0 : case QI_VAL:
2006 andres 1209 UIC 0 : pq_sendint8(&buf, item->qoperand.weight);
2006 andres 1210 LBC 0 : pq_sendint8(&buf, item->qoperand.prefix);
5015 peter_e 1211 UIC 0 : pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
5693 teodor 1212 LBC 0 : break;
1213 0 : case QI_OPR:
2006 andres 1214 UIC 0 : pq_sendint8(&buf, item->qoperator.oper);
2558 teodor 1215 LBC 0 : if (item->qoperator.oper == OP_PHRASE)
2006 andres 1216 0 : pq_sendint16(&buf, item->qoperator.distance);
5693 teodor 1217 0 : break;
1218 0 : default:
5611 tgl 1219 0 : elog(ERROR, "unrecognized tsquery node type: %d", item->type);
5693 teodor 1220 ECB : }
5710 tgl 1221 UIC 0 : item++;
5710 tgl 1222 ECB : }
1223 :
5710 tgl 1224 UIC 0 : PG_FREE_IF_COPY(query, 0);
1225 :
1226 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
1227 : }
1228 :
1229 : Datum
1230 0 : tsqueryrecv(PG_FUNCTION_ARGS)
1231 : {
2495 rhaas 1232 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
1233 : TSQuery query;
1234 : int i,
1235 : len;
1236 : QueryItem *item;
1237 : int datalen;
1238 : char *ptr;
1239 : uint32 size;
1240 : const char **operands;
1241 : bool needcleanup;
1242 :
5710 tgl 1243 0 : size = pq_getmsgint(buf, sizeof(uint32));
5693 teodor 1244 0 : if (size > (MaxAllocSize / sizeof(QueryItem)))
5710 tgl 1245 UBC 0 : elog(ERROR, "invalid size of tsquery");
1246 :
5693 teodor 1247 EUB : /* Allocate space to temporarily hold operand strings */
5693 teodor 1248 UIC 0 : operands = palloc(size * sizeof(char *));
1249 :
5693 teodor 1250 EUB : /* Allocate space for all the QueryItems. */
5693 teodor 1251 UIC 0 : len = HDRSIZETQ + sizeof(QueryItem) * size;
5693 teodor 1252 UBC 0 : query = (TSQuery) palloc0(len);
5710 tgl 1253 UIC 0 : query->size = size;
5710 tgl 1254 UBC 0 : item = GETQUERY(query);
5710 tgl 1255 EUB :
5693 teodor 1256 UIC 0 : datalen = 0;
5710 tgl 1257 UBC 0 : for (i = 0; i < size; i++)
1258 : {
1259 0 : item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1260 :
5693 teodor 1261 0 : if (item->type == QI_VAL)
5710 tgl 1262 EUB : {
2118 1263 : size_t val_len; /* length after recoding to server
1264 : * encoding */
5624 bruce 1265 : uint8 weight;
5441 tgl 1266 : uint8 prefix;
5693 teodor 1267 : const char *val;
5624 bruce 1268 : pg_crc32 valcrc;
5693 teodor 1269 :
5624 bruce 1270 UBC 0 : weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
5441 tgl 1271 0 : prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
5693 teodor 1272 0 : val = pq_getmsgstring(buf);
5693 teodor 1273 UIC 0 : val_len = strlen(val);
5693 teodor 1274 EUB :
1275 : /* Sanity checks */
1276 :
5693 teodor 1277 UBC 0 : if (weight > 0xF)
5611 tgl 1278 UIC 0 : elog(ERROR, "invalid tsquery: invalid weight bitmap");
5693 teodor 1279 EUB :
5693 teodor 1280 UIC 0 : if (val_len > MAXSTRLEN)
5611 tgl 1281 0 : elog(ERROR, "invalid tsquery: operand too long");
1282 :
5693 teodor 1283 UBC 0 : if (datalen > MAXSTRPOS)
5611 tgl 1284 UIC 0 : elog(ERROR, "invalid tsquery: total operand length exceeded");
5693 teodor 1285 EUB :
1286 : /* Looks valid. */
1287 :
3078 heikki.linnakangas 1288 UIC 0 : INIT_LEGACY_CRC32(valcrc);
1289 0 : COMP_LEGACY_CRC32(valcrc, val, val_len);
1290 0 : FIN_LEGACY_CRC32(valcrc);
1291 :
5015 peter_e 1292 0 : item->qoperand.weight = weight;
1293 0 : item->qoperand.prefix = (prefix) ? true : false;
1294 0 : item->qoperand.valcrc = (int32) valcrc;
1295 0 : item->qoperand.length = val_len;
5015 peter_e 1296 UBC 0 : item->qoperand.distance = datalen;
5693 teodor 1297 EUB :
5624 bruce 1298 : /*
1299 : * Operand strings are copied to the final struct after this loop;
1300 : * here we just collect them to an array
5693 teodor 1301 : */
5693 teodor 1302 UIC 0 : operands[i] = val;
1303 :
2118 tgl 1304 UBC 0 : datalen += val_len + 1; /* + 1 for the '\0' terminator */
5624 bruce 1305 EUB : }
5693 teodor 1306 UBC 0 : else if (item->type == QI_OPR)
5693 teodor 1307 EUB : {
1308 : int8 oper;
5624 bruce 1309 :
5693 teodor 1310 UBC 0 : oper = (int8) pq_getmsgint(buf, sizeof(int8));
2558 teodor 1311 UIC 0 : if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
5611 tgl 1312 UBC 0 : elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1313 : (int) oper);
5693 teodor 1314 0 : if (i == size - 1)
5693 teodor 1315 UIC 0 : elog(ERROR, "invalid pointer to right operand");
1316 :
5015 peter_e 1317 0 : item->qoperator.oper = oper;
2558 teodor 1318 0 : if (oper == OP_PHRASE)
1319 0 : item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1320 : }
1321 : else
5611 tgl 1322 0 : elog(ERROR, "unrecognized tsquery node type: %d", item->type);
5710 tgl 1323 EUB :
5710 tgl 1324 UBC 0 : item++;
5710 tgl 1325 EUB : }
1326 :
1327 : /* Enlarge buffer to make room for the operand values. */
5710 tgl 1328 UIC 0 : query = (TSQuery) repalloc(query, len + datalen);
1329 0 : item = GETQUERY(query);
5710 tgl 1330 UBC 0 : ptr = GETOPERAND(query);
5693 teodor 1331 EUB :
1332 : /*
5624 bruce 1333 : * Fill in the left-pointers. Checks that the tree is well-formed as a
1334 : * side-effect.
1335 : */
2558 teodor 1336 UBC 0 : findoprnd(item, size, &needcleanup);
5693 teodor 1337 EUB :
1338 : /* Can't have found any QI_VALSTOP nodes */
2300 tgl 1339 UIC 0 : Assert(!needcleanup);
1340 :
5693 teodor 1341 EUB : /* Copy operands to output struct */
5710 tgl 1342 UBC 0 : for (i = 0; i < size; i++)
5710 tgl 1343 EUB : {
5693 teodor 1344 UIC 0 : if (item->type == QI_VAL)
5710 tgl 1345 EUB : {
5015 peter_e 1346 UBC 0 : memcpy(ptr, operands[i], item->qoperand.length + 1);
1347 0 : ptr += item->qoperand.length + 1;
5710 tgl 1348 EUB : }
5710 tgl 1349 UBC 0 : item++;
1350 : }
1351 :
5693 teodor 1352 UIC 0 : pfree(operands);
1353 :
5710 tgl 1354 0 : Assert(ptr - GETOPERAND(query) == datalen);
5710 tgl 1355 EUB :
5710 tgl 1356 UIC 0 : SET_VARSIZE(query, len + datalen);
5710 tgl 1357 EUB :
2558 teodor 1358 UIC 0 : PG_RETURN_TSQUERY(query);
5710 tgl 1359 EUB : }
1360 :
1361 : /*
1362 : * debug function, used only for view query
1363 : * which will be executed in non-leaf pages in index
1364 : */
1365 : Datum
5710 tgl 1366 UIC 0 : tsquerytree(PG_FUNCTION_ARGS)
5710 tgl 1367 EUB : {
5710 tgl 1368 UBC 0 : TSQuery query = PG_GETARG_TSQUERY(0);
1369 : INFIX nrm;
5710 tgl 1370 EUB : text *res;
1371 : QueryItem *q;
5693 teodor 1372 : int len;
1373 :
5710 tgl 1374 UIC 0 : if (query->size == 0)
5710 tgl 1375 EUB : {
5710 tgl 1376 UIC 0 : res = (text *) palloc(VARHDRSZ);
5710 tgl 1377 UBC 0 : SET_VARSIZE(res, VARHDRSZ);
5710 tgl 1378 UIC 0 : PG_RETURN_POINTER(res);
1379 : }
1380 :
5710 tgl 1381 UBC 0 : q = clean_NOT(GETQUERY(query), &len);
5710 tgl 1382 EUB :
5710 tgl 1383 UBC 0 : if (!q)
1384 : {
5493 tgl 1385 UIC 0 : res = cstring_to_text("T");
1386 : }
1387 : else
1388 : {
5710 tgl 1389 UBC 0 : nrm.curpol = q;
5710 tgl 1390 UIC 0 : nrm.buflen = 32;
1391 0 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
5710 tgl 1392 UBC 0 : *(nrm.cur) = '\0';
5710 tgl 1393 UIC 0 : nrm.op = GETOPERAND(query);
2477 teodor 1394 0 : infix(&nrm, -1, false);
5493 tgl 1395 UBC 0 : res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
5710 tgl 1396 UIC 0 : pfree(q);
5710 tgl 1397 EUB : }
1398 :
5710 tgl 1399 UBC 0 : PG_FREE_IF_COPY(query, 0);
5710 tgl 1400 EUB :
5493 tgl 1401 UIC 0 : PG_RETURN_TEXT_P(res);
5710 tgl 1402 EUB : }
|