TLA Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * tsquery.c
4 : * I/O functions for tsquery
5 : *
6 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
7 : *
8 : *
9 : * IDENTIFICATION
10 : * src/backend/utils/adt/tsquery.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : #include "postgres.h"
16 :
17 : #include "libpq/pqformat.h"
18 : #include "miscadmin.h"
19 : #include "nodes/miscnodes.h"
20 : #include "tsearch/ts_locale.h"
21 : #include "tsearch/ts_type.h"
22 : #include "tsearch/ts_utils.h"
23 : #include "utils/builtins.h"
24 : #include "utils/memutils.h"
25 : #include "utils/pg_crc.h"
26 : #include "varatt.h"
27 :
28 : /* FTS operator priorities, see ts_type.h */
29 : const int tsearch_op_priority[OP_COUNT] =
30 : {
31 : 4, /* OP_NOT */
32 : 2, /* OP_AND */
33 : 1, /* OP_OR */
34 : 3 /* OP_PHRASE */
35 : };
36 :
37 : /*
38 : * parser's states
39 : */
40 : typedef enum
41 : {
42 : WAITOPERAND = 1,
43 : WAITOPERATOR = 2,
44 : WAITFIRSTOPERAND = 3
45 : } ts_parserstate;
46 :
47 : /*
48 : * token types for parsing
49 : */
50 : typedef enum
51 : {
52 : PT_END = 0,
53 : PT_ERR = 1,
54 : PT_VAL = 2,
55 : PT_OPR = 3,
56 : PT_OPEN = 4,
57 : PT_CLOSE = 5
58 : } ts_tokentype;
59 :
60 : /*
61 : * get token from query string
62 : *
63 : * All arguments except "state" are output arguments.
64 : *
65 : * If return value is PT_OPR, then *operator is filled with an OP_* code
66 : * and *weight will contain a distance value in case of phrase operator.
67 : *
68 : * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix
69 : * are filled.
70 : *
71 : * If PT_ERR is returned then a soft error has occurred. If state->escontext
72 : * isn't already filled then this should be reported as a generic parse error.
73 : */
74 : typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
75 : int *lenval, char **strval,
76 : int16 *weight, bool *prefix);
77 :
78 : struct TSQueryParserStateData
79 : {
80 : /* Tokenizer used for parsing tsquery */
81 : ts_tokenizer gettoken;
82 :
83 : /* State of tokenizer function */
84 : char *buffer; /* entire string we are scanning */
85 : char *buf; /* current scan point */
86 : int count; /* nesting count, incremented by (,
87 : * decremented by ) */
88 : ts_parserstate state;
89 :
90 : /* polish (prefix) notation in list, filled in by push* functions */
91 : List *polstr;
92 :
93 : /*
94 : * Strings from operands are collected in op. curop is a pointer to the
95 : * end of used space of op.
96 : */
97 : char *op;
98 : char *curop;
99 : int lenop; /* allocated size of op */
100 : int sumlen; /* used size of op */
101 :
102 : /* state for value's parser */
103 : TSVectorParseState valstate;
104 :
105 : /* context object for soft errors - must match valstate's escontext */
106 : Node *escontext;
107 : };
108 :
109 : /*
110 : * subroutine to parse the modifiers (weight and prefix flag currently)
111 : * part, like ':AB*' of a query.
112 : */
113 : static char *
114 GIC 3603 : get_modifiers(char *buf, int16 *weight, bool *prefix)
115 : {
116 3603 : *weight = 0;
117 3603 : *prefix = false;
118 :
119 3603 : if (!t_iseq(buf, ':'))
120 3285 : return buf;
121 :
122 318 : buf++;
123 744 : while (*buf && pg_mblen(buf) == 1)
124 : {
125 CBC 534 : switch (*buf)
126 : {
127 117 : case 'a':
128 ECB : case 'A':
129 GIC 117 : *weight |= 1 << 3;
130 CBC 117 : break;
131 33 : case 'b':
132 : case 'B':
133 33 : *weight |= 1 << 2;
134 33 : break;
135 GIC 57 : case 'c':
136 ECB : case 'C':
137 GIC 57 : *weight |= 1 << 1;
138 CBC 57 : break;
139 GIC 60 : case 'd':
140 ECB : case 'D':
141 CBC 60 : *weight |= 1;
142 60 : break;
143 GIC 159 : case '*':
144 CBC 159 : *prefix = true;
145 159 : break;
146 108 : default:
147 GIC 108 : return buf;
148 ECB : }
149 CBC 426 : buf++;
150 ECB : }
151 :
152 CBC 210 : return buf;
153 ECB : }
154 :
155 : /*
156 : * Parse phrase operator. The operator
157 : * may take the following forms:
158 : *
159 : * a <N> b (distance is exactly N lexemes)
160 : * a <-> b (default distance = 1)
161 : *
162 : * The buffer should begin with '<' char
163 : */
164 : static bool
165 GIC 4539 : parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
166 : {
167 : enum
168 : {
169 : PHRASE_OPEN = 0,
170 : PHRASE_DIST,
171 : PHRASE_CLOSE,
172 : PHRASE_FINISH
173 4539 : } state = PHRASE_OPEN;
174 4539 : char *ptr = pstate->buf;
175 : char *endptr;
176 CBC 4539 : long l = 1; /* default distance */
177 :
178 GIC 11682 : while (*ptr)
179 : {
180 5492 : switch (state)
181 : {
182 2888 : case PHRASE_OPEN:
183 2888 : if (t_iseq(ptr, '<'))
184 ECB : {
185 CBC 870 : state = PHRASE_DIST;
186 GIC 870 : ptr++;
187 ECB : }
188 : else
189 CBC 2018 : return false;
190 GIC 870 : break;
191 ECB :
192 GIC 870 : case PHRASE_DIST:
193 CBC 870 : if (t_iseq(ptr, '-'))
194 ECB : {
195 GIC 723 : state = PHRASE_CLOSE;
196 CBC 723 : ptr++;
197 723 : continue;
198 : }
199 :
200 147 : if (!t_isdigit(ptr))
201 LBC 0 : return false;
202 :
203 CBC 147 : errno = 0;
204 147 : l = strtol(ptr, &endptr, 10);
205 GIC 147 : if (ptr == endptr)
206 LBC 0 : return false;
207 CBC 147 : else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
208 GNC 3 : ereturn(pstate->escontext, false,
209 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
210 : errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
211 ECB : MAXENTRYPOS)));
212 EUB : else
213 : {
214 CBC 144 : state = PHRASE_CLOSE;
215 144 : ptr = endptr;
216 ECB : }
217 GBC 144 : break;
218 ECB :
219 CBC 867 : case PHRASE_CLOSE:
220 GIC 867 : if (t_iseq(ptr, '>'))
221 : {
222 867 : state = PHRASE_FINISH;
223 867 : ptr++;
224 : }
225 ECB : else
226 LBC 0 : return false;
227 GIC 867 : break;
228 ECB :
229 GIC 867 : case PHRASE_FINISH:
230 CBC 867 : *distance = (int16) l;
231 867 : pstate->buf = ptr;
232 GIC 867 : return true;
233 ECB : }
234 : }
235 :
236 GIC 1651 : return false;
237 EUB : }
238 ECB :
239 : /*
240 : * Parse OR operator used in websearch_to_tsquery(), returns true if we
241 : * believe that "OR" literal could be an operator OR
242 : */
243 : static bool
244 GIC 771 : parse_or_operator(TSQueryParserState pstate)
245 : {
246 771 : char *ptr = pstate->buf;
247 ECB :
248 : /* it should begin with "OR" literal */
249 GIC 771 : if (pg_strncasecmp(ptr, "or", 2) != 0)
250 699 : return false;
251 :
252 72 : ptr += 2;
253 :
254 : /*
255 ECB : * it shouldn't be a part of any word but somewhere later it should be
256 : * some operand
257 : */
258 GIC 72 : if (*ptr == '\0') /* no operand */
259 3 : return false;
260 ECB :
261 : /* it shouldn't be a part of any word */
262 GNC 69 : if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr))
263 CBC 12 : return false;
264 :
265 : for (;;)
266 : {
267 GIC 57 : ptr += pg_mblen(ptr);
268 :
269 CBC 57 : if (*ptr == '\0') /* got end of string without operand */
270 6 : return false;
271 :
272 : /*
273 ECB : * Suppose, we found an operand, but could be a not correct operand.
274 : * So we still treat OR literal as operation with possibly incorrect
275 : * operand and will not search it as lexeme
276 : */
277 GIC 51 : if (!t_isspace(ptr))
278 CBC 51 : break;
279 : }
280 ECB :
281 CBC 51 : pstate->buf += 2;
282 GIC 51 : return true;
283 : }
284 :
285 : static ts_tokentype
286 8745 : gettoken_query_standard(TSQueryParserState state, int8 *operator,
287 : int *lenval, char **strval,
288 ECB : int16 *weight, bool *prefix)
289 : {
290 GIC 8745 : *weight = 0;
291 8745 : *prefix = false;
292 ECB :
293 : while (true)
294 : {
295 GIC 11692 : switch (state->state)
296 : {
297 CBC 6077 : case WAITFIRSTOPERAND:
298 : case WAITOPERAND:
299 GIC 6077 : if (t_iseq(state->buf, '!'))
300 : {
301 CBC 465 : state->buf++;
302 465 : state->state = WAITOPERAND;
303 GIC 465 : *operator = OP_NOT;
304 465 : return PT_OPR;
305 : }
306 CBC 5612 : else if (t_iseq(state->buf, '('))
307 : {
308 531 : state->buf++;
309 GIC 531 : state->state = WAITOPERAND;
310 CBC 531 : state->count++;
311 GIC 531 : return PT_OPEN;
312 ECB : }
313 CBC 5081 : else if (t_iseq(state->buf, ':'))
314 ECB : {
315 : /* generic syntax error message is fine */
316 UNC 0 : return PT_ERR;
317 ECB : }
318 CBC 5081 : else if (!t_isspace(state->buf))
319 ECB : {
320 : /*
321 : * We rely on the tsvector parser to parse the value for
322 : * us
323 : */
324 GIC 3615 : reset_tsvector_parser(state->valstate, state->buf);
325 GBC 3615 : if (gettoken_tsvector(state->valstate, strval, lenval,
326 : NULL, NULL, &state->buf))
327 ECB : {
328 GIC 3603 : state->buf = get_modifiers(state->buf, weight, prefix);
329 3603 : state->state = WAITOPERATOR;
330 3603 : return PT_VAL;
331 : }
332 GNC 12 : else if (SOFT_ERROR_OCCURRED(state->escontext))
333 : {
334 : /* gettoken_tsvector reported a soft error */
335 UNC 0 : return PT_ERR;
336 : }
337 GIC 12 : else if (state->state == WAITFIRSTOPERAND)
338 ECB : {
339 CBC 12 : return PT_END;
340 : }
341 : else
342 UNC 0 : ereturn(state->escontext, PT_ERR,
343 ECB : (errcode(ERRCODE_SYNTAX_ERROR),
344 : errmsg("no operand in tsquery: \"%s\"",
345 : state->buffer)));
346 : }
347 GIC 1466 : break;
348 :
349 GBC 5615 : case WAITOPERATOR:
350 GIC 5615 : if (t_iseq(state->buf, '&'))
351 ECB : {
352 GIC 665 : state->buf++;
353 CBC 665 : state->state = WAITOPERAND;
354 GIC 665 : *operator = OP_AND;
355 665 : return PT_OPR;
356 EUB : }
357 GIC 4950 : else if (t_iseq(state->buf, '|'))
358 : {
359 411 : state->buf++;
360 411 : state->state = WAITOPERAND;
361 CBC 411 : *operator = OP_OR;
362 GIC 411 : return PT_OPR;
363 ECB : }
364 CBC 4539 : else if (parse_phrase_operator(state, weight))
365 : {
366 ECB : /* weight var is used as storage for distance */
367 CBC 867 : state->state = WAITOPERAND;
368 867 : *operator = OP_PHRASE;
369 867 : return PT_OPR;
370 : }
371 GNC 3672 : else if (SOFT_ERROR_OCCURRED(state->escontext))
372 : {
373 : /* parse_phrase_operator reported a soft error */
374 3 : return PT_ERR;
375 : }
376 CBC 3669 : else if (t_iseq(state->buf, ')'))
377 : {
378 531 : state->buf++;
379 531 : state->count--;
380 531 : return (state->count < 0) ? PT_ERR : PT_CLOSE;
381 ECB : }
382 GIC 3138 : else if (*state->buf == '\0')
383 ECB : {
384 GIC 1651 : return (state->count) ? PT_ERR : PT_END;
385 : }
386 CBC 1487 : else if (!t_isspace(state->buf))
387 ECB : {
388 CBC 6 : return PT_ERR;
389 : }
390 1481 : break;
391 : }
392 :
393 2947 : state->buf += pg_mblen(state->buf);
394 : }
395 ECB : }
396 :
397 : static ts_tokentype
398 CBC 1116 : gettoken_query_websearch(TSQueryParserState state, int8 *operator,
399 ECB : int *lenval, char **strval,
400 : int16 *weight, bool *prefix)
401 : {
402 GIC 1116 : *weight = 0;
403 CBC 1116 : *prefix = false;
404 :
405 ECB : while (true)
406 : {
407 CBC 1539 : switch (state->state)
408 : {
409 723 : case WAITFIRSTOPERAND:
410 : case WAITOPERAND:
411 GIC 723 : if (t_iseq(state->buf, '-'))
412 ECB : {
413 GIC 33 : state->buf++;
414 33 : state->state = WAITOPERAND;
415 :
416 33 : *operator = OP_NOT;
417 CBC 33 : return PT_OPR;
418 : }
419 GIC 690 : else if (t_iseq(state->buf, '"'))
420 : {
421 ECB : /* Everything in quotes is processed as a single token */
422 :
423 : /* skip opening quote */
424 GIC 96 : state->buf++;
425 96 : *strval = state->buf;
426 ECB :
427 : /* iterate to the closing quote or end of the string */
428 CBC 870 : while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
429 GIC 774 : state->buf++;
430 CBC 96 : *lenval = state->buf - *strval;
431 :
432 ECB : /* skip closing quote if not end of the string */
433 CBC 96 : if (*state->buf != '\0')
434 GIC 84 : state->buf++;
435 ECB :
436 CBC 96 : state->state = WAITOPERATOR;
437 GIC 96 : state->count++;
438 CBC 96 : return PT_VAL;
439 : }
440 GIC 594 : else if (ISOPERATOR(state->buf))
441 : {
442 : /* or else gettoken_tsvector() will raise an error */
443 CBC 96 : state->buf++;
444 96 : state->state = WAITOPERAND;
445 GIC 96 : continue;
446 : }
447 CBC 498 : else if (!t_isspace(state->buf))
448 ECB : {
449 : /*
450 : * We rely on the tsvector parser to parse the value for
451 : * us
452 : */
453 CBC 450 : reset_tsvector_parser(state->valstate, state->buf);
454 GIC 450 : if (gettoken_tsvector(state->valstate, strval, lenval,
455 ECB : NULL, NULL, &state->buf))
456 : {
457 CBC 441 : state->state = WAITOPERATOR;
458 GIC 441 : return PT_VAL;
459 ECB : }
460 GNC 9 : else if (SOFT_ERROR_OCCURRED(state->escontext))
461 : {
462 : /* gettoken_tsvector reported a soft error */
463 UNC 0 : return PT_ERR;
464 : }
465 GIC 9 : else if (state->state == WAITFIRSTOPERAND)
466 : {
467 LBC 0 : return PT_END;
468 ECB : }
469 : else
470 : {
471 : /* finally, we have to provide an operand */
472 GIC 9 : pushStop(state);
473 9 : return PT_END;
474 : }
475 : }
476 48 : break;
477 ECB :
478 CBC 816 : case WAITOPERATOR:
479 GIC 816 : if (t_iseq(state->buf, '"'))
480 : {
481 ECB : /*
482 : * put implicit AND after an operand and handle this quote
483 : * in WAITOPERAND
484 : */
485 GIC 45 : state->state = WAITOPERAND;
486 45 : *operator = OP_AND;
487 GBC 45 : return PT_OPR;
488 : }
489 CBC 771 : else if (parse_or_operator(state))
490 : {
491 GBC 51 : state->state = WAITOPERAND;
492 GIC 51 : *operator = OP_OR;
493 51 : return PT_OPR;
494 : }
495 720 : else if (*state->buf == '\0')
496 ECB : {
497 CBC 195 : return PT_END;
498 : }
499 GIC 525 : else if (!t_isspace(state->buf))
500 ECB : {
501 : /* put implicit AND after an operand */
502 CBC 246 : *operator = OP_AND;
503 246 : state->state = WAITOPERAND;
504 GIC 246 : return PT_OPR;
505 : }
506 279 : break;
507 : }
508 :
509 CBC 327 : state->buf += pg_mblen(state->buf);
510 ECB : }
511 : }
512 :
513 : static ts_tokentype
514 GIC 108 : gettoken_query_plain(TSQueryParserState state, int8 *operator,
515 ECB : int *lenval, char **strval,
516 : int16 *weight, bool *prefix)
517 : {
518 GIC 108 : *weight = 0;
519 CBC 108 : *prefix = false;
520 :
521 108 : if (*state->buf == '\0')
522 GIC 54 : return PT_END;
523 ECB :
524 GIC 54 : *strval = state->buf;
525 54 : *lenval = strlen(state->buf);
526 CBC 54 : state->buf += *lenval;
527 54 : state->count++;
528 54 : return PT_VAL;
529 : }
530 ECB :
531 : /*
532 : * Push an operator to state->polstr
533 : */
534 : void
535 GIC 3119 : pushOperator(TSQueryParserState state, int8 oper, int16 distance)
536 : {
537 : QueryOperator *tmp;
538 ECB :
539 GIC 3119 : Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
540 :
541 3119 : tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
542 CBC 3119 : tmp->type = QI_OPR;
543 3119 : tmp->oper = oper;
544 GIC 3119 : tmp->distance = (oper == OP_PHRASE) ? distance : 0;
545 ECB : /* left is filled in later with findoprnd */
546 :
547 GIC 3119 : state->polstr = lcons(tmp, state->polstr);
548 CBC 3119 : }
549 ECB :
550 : static void
551 CBC 4197 : pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
552 ECB : {
553 : QueryOperand *tmp;
554 :
555 GIC 4197 : if (distance >= MAXSTRPOS)
556 UNC 0 : ereturn(state->escontext,,
557 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
558 : errmsg("value is too big in tsquery: \"%s\"",
559 ECB : state->buffer)));
560 GIC 4197 : if (lenval >= MAXSTRLEN)
561 UNC 0 : ereturn(state->escontext,,
562 : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
563 ECB : errmsg("operand is too long in tsquery: \"%s\"",
564 : state->buffer)));
565 :
566 CBC 4197 : tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
567 4197 : tmp->type = QI_VAL;
568 4197 : tmp->weight = weight;
569 GIC 4197 : tmp->prefix = prefix;
570 4197 : tmp->valcrc = (int32) valcrc;
571 CBC 4197 : tmp->length = lenval;
572 4197 : tmp->distance = distance;
573 :
574 GIC 4197 : state->polstr = lcons(tmp, state->polstr);
575 ECB : }
576 :
577 : /*
578 : * Push an operand to state->polstr.
579 : *
580 EUB : * strval must point to a string equal to state->curop. lenval is the length
581 : * of the string.
582 : */
583 : void
584 CBC 4197 : pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
585 EUB : {
586 : pg_crc32 valcrc;
587 :
588 GIC 4197 : if (lenval >= MAXSTRLEN)
589 UNC 0 : ereturn(state->escontext,,
590 ECB : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
591 : errmsg("word is too long in tsquery: \"%s\"",
592 : state->buffer)));
593 :
594 CBC 4197 : INIT_LEGACY_CRC32(valcrc);
595 14802 : COMP_LEGACY_CRC32(valcrc, strval, lenval);
596 4197 : FIN_LEGACY_CRC32(valcrc);
597 GIC 4197 : pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
598 ECB :
599 : /* append the value string to state.op, enlarging buffer if needed first */
600 GIC 4197 : while (state->curop - state->op + lenval + 1 >= state->lenop)
601 : {
602 UIC 0 : int used = state->curop - state->op;
603 :
604 0 : state->lenop *= 2;
605 UNC 0 : state->op = (char *) repalloc(state->op, state->lenop);
606 UIC 0 : state->curop = state->op + used;
607 : }
608 GNC 4197 : memcpy(state->curop, strval, lenval);
609 GIC 4197 : state->curop += lenval;
610 4197 : *(state->curop) = '\0';
611 4197 : state->curop++;
612 CBC 4197 : state->sumlen += lenval + 1 /* \0 */ ;
613 EUB : }
614 :
615 :
616 : /*
617 : * Push a stopword placeholder to state->polstr
618 ECB : */
619 : void
620 CBC 342 : pushStop(TSQueryParserState state)
621 ECB : {
622 : QueryOperand *tmp;
623 :
624 CBC 342 : tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
625 GIC 342 : tmp->type = QI_VALSTOP;
626 EUB :
627 GIC 342 : state->polstr = lcons(tmp, state->polstr);
628 GBC 342 : }
629 EUB :
630 :
631 : #define STACKDEPTH 32
632 ECB :
633 : typedef struct OperatorElement
634 : {
635 : int8 op;
636 : int16 distance;
637 : } OperatorElement;
638 :
639 : static void
640 GIC 2783 : pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
641 : {
642 2783 : if (*lenstack == STACKDEPTH) /* internal error */
643 UIC 0 : elog(ERROR, "tsquery stack too small");
644 ECB :
645 GIC 2783 : stack[*lenstack].op = op;
646 2783 : stack[*lenstack].distance = distance;
647 :
648 CBC 2783 : (*lenstack)++;
649 2783 : }
650 :
651 ECB : static void
652 CBC 5235 : cleanOpStack(TSQueryParserState state,
653 : OperatorElement *stack, int *lenstack, int8 op)
654 : {
655 GIC 5235 : int opPriority = OP_PRIORITY(op);
656 :
657 8018 : while (*lenstack)
658 : {
659 : /* NOT is right associative unlike to others */
660 3029 : if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
661 159 : (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
662 : break;
663 :
664 CBC 2783 : (*lenstack)--;
665 GIC 2783 : pushOperator(state, stack[*lenstack].op,
666 CBC 2783 : stack[*lenstack].distance);
667 EUB : }
668 GIC 5235 : }
669 ECB :
670 : /*
671 : * Make polish (prefix) notation of query.
672 : *
673 : * See parse_tsquery for explanation of pushval.
674 : */
675 : static void
676 CBC 2461 : makepol(TSQueryParserState state,
677 : PushFunction pushval,
678 : Datum opaque)
679 ECB : {
680 GIC 2461 : int8 operator = 0;
681 ECB : ts_tokentype type;
682 GIC 2461 : int lenval = 0;
683 2461 : char *strval = NULL;
684 ECB : OperatorElement opstack[STACKDEPTH];
685 CBC 2461 : int lenstack = 0;
686 GIC 2461 : int16 weight = 0;
687 : bool prefix;
688 ECB :
689 : /* since this function recurses, it could be driven to stack overflow */
690 CBC 2461 : check_stack_depth();
691 :
692 9969 : while ((type = state->gettoken(state, &operator,
693 : &lenval, &strval,
694 GIC 9969 : &weight, &prefix)) != PT_END)
695 : {
696 8048 : switch (type)
697 : {
698 4194 : case PT_VAL:
699 4194 : pushval(opaque, state, strval, lenval, weight, prefix);
700 CBC 4194 : break;
701 GIC 2783 : case PT_OPR:
702 2783 : cleanOpStack(state, opstack, &lenstack, operator);
703 2783 : pushOpStack(opstack, &lenstack, operator, weight);
704 CBC 2783 : break;
705 GIC 531 : case PT_OPEN:
706 CBC 531 : makepol(state, pushval, opaque);
707 531 : break;
708 GIC 531 : case PT_CLOSE:
709 CBC 531 : cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
710 540 : return;
711 GIC 9 : case PT_ERR:
712 : default:
713 : /* don't overwrite a soft error saved by gettoken function */
714 GNC 9 : if (!SOFT_ERROR_OCCURRED(state->escontext))
715 6 : errsave(state->escontext,
716 : (errcode(ERRCODE_SYNTAX_ERROR),
717 : errmsg("syntax error in tsquery: \"%s\"",
718 : state->buffer)));
719 9 : return;
720 : }
721 : /* detect soft error in pushval or recursion */
722 7508 : if (SOFT_ERROR_OCCURRED(state->escontext))
723 UNC 0 : return;
724 ECB : }
725 :
726 CBC 1921 : cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
727 : }
728 ECB :
729 : static void
730 CBC 7649 : findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
731 ECB : {
732 : /* since this function recurses, it could be driven to stack overflow. */
733 CBC 7649 : check_stack_depth();
734 ECB :
735 CBC 7649 : if (*pos >= nnodes)
736 LBC 0 : elog(ERROR, "malformed tsquery: operand not found");
737 ECB :
738 CBC 7649 : if (ptr[*pos].type == QI_VAL)
739 ECB : {
740 CBC 4188 : (*pos)++;
741 ECB : }
742 GIC 3461 : else if (ptr[*pos].type == QI_VALSTOP)
743 : {
744 CBC 342 : *needcleanup = true; /* we'll have to remove stop words */
745 342 : (*pos)++;
746 : }
747 : else
748 : {
749 3119 : Assert(ptr[*pos].type == QI_OPR);
750 :
751 GIC 3119 : if (ptr[*pos].qoperator.oper == OP_NOT)
752 ECB : {
753 GBC 498 : ptr[*pos].qoperator.left = 1; /* fixed offset */
754 GIC 498 : (*pos)++;
755 :
756 ECB : /* process the only argument */
757 GIC 498 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
758 : }
759 : else
760 ECB : {
761 GIC 2621 : QueryOperator *curitem = &ptr[*pos].qoperator;
762 2621 : int tmp = *pos; /* save current position */
763 ECB :
764 GIC 2621 : Assert(curitem->oper == OP_AND ||
765 ECB : curitem->oper == OP_OR ||
766 EUB : curitem->oper == OP_PHRASE);
767 :
768 CBC 2621 : (*pos)++;
769 :
770 ECB : /* process RIGHT argument */
771 GIC 2621 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
772 ECB :
773 GIC 2621 : curitem->left = *pos - tmp; /* set LEFT arg's offset */
774 ECB :
775 : /* process LEFT argument */
776 GIC 2621 : findoprnd_recurse(ptr, pos, nnodes, needcleanup);
777 : }
778 : }
779 CBC 7649 : }
780 :
781 ECB :
782 : /*
783 : * Fill in the left-fields previously left unfilled.
784 : * The input QueryItems must be in polish (prefix) notation.
785 : * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
786 : */
787 : static void
788 GIC 1909 : findoprnd(QueryItem *ptr, int size, bool *needcleanup)
789 : {
790 : uint32 pos;
791 ECB :
792 CBC 1909 : *needcleanup = false;
793 GIC 1909 : pos = 0;
794 CBC 1909 : findoprnd_recurse(ptr, &pos, size, needcleanup);
795 :
796 GIC 1909 : if (pos != size)
797 UIC 0 : elog(ERROR, "malformed tsquery: extra nodes");
798 CBC 1909 : }
799 :
800 :
801 ECB : /*
802 : * Parse the tsquery stored in "buf".
803 : *
804 : * Each value (operand) in the query is passed to pushval. pushval can
805 : * transform the simple value to an arbitrarily complex expression using
806 : * pushValue and pushOperator. It must push a single value with pushValue,
807 : * a complete expression with all operands, or a stopword placeholder
808 : * with pushStop, otherwise the prefix notation representation will be broken,
809 : * having an operator with no operand.
810 : *
811 : * opaque is passed on to pushval as is, pushval can use it to store its
812 : * private state.
813 : *
814 : * The pushval function can record soft errors via escontext.
815 : * Callers must check SOFT_ERROR_OCCURRED to detect that.
816 : *
817 : * A bitmask of flags (see ts_utils.h) and an error context object
818 : * can be provided as well. If a soft error occurs, NULL is returned.
819 : */
820 : TSQuery
821 GIC 1930 : parse_tsquery(char *buf,
822 : PushFunction pushval,
823 : Datum opaque,
824 : int flags,
825 : Node *escontext)
826 : {
827 ECB : struct TSQueryParserStateData state;
828 : int i;
829 : TSQuery query;
830 : int commonlen;
831 : QueryItem *ptr;
832 : ListCell *cell;
833 : bool noisy;
834 : bool needcleanup;
835 GIC 1930 : int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
836 ECB :
837 EUB : /* plain should not be used with web */
838 CBC 1930 : Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
839 :
840 : /* select suitable tokenizer */
841 GIC 1930 : if (flags & P_TSQ_PLAIN)
842 54 : state.gettoken = gettoken_query_plain;
843 1876 : else if (flags & P_TSQ_WEB)
844 : {
845 204 : state.gettoken = gettoken_query_websearch;
846 204 : tsv_flags |= P_TSV_IS_WEB;
847 : }
848 : else
849 1672 : state.gettoken = gettoken_query_standard;
850 :
851 : /* emit nuisance NOTICEs only if not doing soft errors */
852 GNC 1930 : noisy = !(escontext && IsA(escontext, ErrorSaveContext));
853 :
854 : /* init state */
855 GIC 1930 : state.buffer = buf;
856 1930 : state.buf = buf;
857 1930 : state.count = 0;
858 1930 : state.state = WAITFIRSTOPERAND;
859 1930 : state.polstr = NIL;
860 GNC 1930 : state.escontext = escontext;
861 :
862 : /* init value parser's state */
863 1930 : state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext);
864 :
865 ECB : /* init list of operand */
866 GIC 1930 : state.sumlen = 0;
867 1930 : state.lenop = 64;
868 1930 : state.curop = state.op = (char *) palloc(state.lenop);
869 1930 : *(state.curop) = '\0';
870 :
871 : /* parse query & make polish notation (postfix, but in reverse order) */
872 1930 : makepol(&state, pushval, opaque);
873 :
874 1930 : close_tsvector_parser(state.valstate);
875 :
876 GNC 1930 : if (SOFT_ERROR_OCCURRED(escontext))
877 9 : return NULL;
878 :
879 1921 : if (state.polstr == NIL)
880 : {
881 12 : if (noisy)
882 12 : ereport(NOTICE,
883 : (errmsg("text-search query doesn't contain lexemes: \"%s\"",
884 : state.buffer)));
885 GIC 12 : query = (TSQuery) palloc(HDRSIZETQ);
886 CBC 12 : SET_VARSIZE(query, HDRSIZETQ);
887 GIC 12 : query->size = 0;
888 12 : return query;
889 ECB : }
890 :
891 CBC 1909 : if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
892 UNC 0 : ereturn(escontext, NULL,
893 ECB : (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
894 : errmsg("tsquery is too large")));
895 GIC 1909 : commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
896 :
897 ECB : /* Pack the QueryItems in the final TSQuery struct to return to caller */
898 GIC 1909 : query = (TSQuery) palloc0(commonlen);
899 1909 : SET_VARSIZE(query, commonlen);
900 CBC 1909 : query->size = list_length(state.polstr);
901 GIC 1909 : ptr = GETQUERY(query);
902 :
903 ECB : /* Copy QueryItems to TSQuery */
904 CBC 1909 : i = 0;
905 9558 : foreach(cell, state.polstr)
906 ECB : {
907 CBC 7649 : QueryItem *item = (QueryItem *) lfirst(cell);
908 ECB :
909 GIC 7649 : switch (item->type)
910 : {
911 CBC 4188 : case QI_VAL:
912 GIC 4188 : memcpy(&ptr[i], item, sizeof(QueryOperand));
913 4188 : break;
914 CBC 342 : case QI_VALSTOP:
915 342 : ptr[i].type = QI_VALSTOP;
916 342 : break;
917 3119 : case QI_OPR:
918 GIC 3119 : memcpy(&ptr[i], item, sizeof(QueryOperator));
919 3119 : break;
920 LBC 0 : default:
921 UIC 0 : elog(ERROR, "unrecognized QueryItem type: %d", item->type);
922 ECB : }
923 GIC 7649 : i++;
924 ECB : }
925 :
926 : /* Copy all the operand strings to TSQuery */
927 GNC 1909 : memcpy(GETOPERAND(query), state.op, state.sumlen);
928 GIC 1909 : pfree(state.op);
929 ECB :
930 : /*
931 : * Set left operand pointers for every operator. While we're at it,
932 : * detect whether there are any QI_VALSTOP nodes.
933 : */
934 CBC 1909 : findoprnd(ptr, query->size, &needcleanup);
935 ECB :
936 : /*
937 : * If there are QI_VALSTOP nodes, delete them and simplify the tree.
938 : */
939 CBC 1909 : if (needcleanup)
940 GNC 222 : query = cleanup_tsquery_stopwords(query, noisy);
941 :
942 GIC 1909 : return query;
943 ECB : }
944 :
945 : static void
946 CBC 2650 : pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
947 ECB : int16 weight, bool prefix)
948 : {
949 CBC 2650 : pushValue(state, strval, lenval, weight, prefix);
950 GIC 2650 : }
951 :
952 ECB : /*
953 : * in without morphology
954 : */
955 : Datum
956 GIC 1295 : tsqueryin(PG_FUNCTION_ARGS)
957 ECB : {
958 GIC 1295 : char *in = PG_GETARG_CSTRING(0);
959 GNC 1295 : Node *escontext = fcinfo->context;
960 ECB :
961 GNC 1295 : PG_RETURN_TSQUERY(parse_tsquery(in,
962 : pushval_asis,
963 : PointerGetDatum(NULL),
964 : 0,
965 : escontext));
966 ECB : }
967 :
968 : /*
969 : * out function
970 : */
971 : typedef struct
972 : {
973 EUB : QueryItem *curpol;
974 : char *buf;
975 : char *cur;
976 ECB : char *op;
977 : int buflen;
978 : } INFIX;
979 :
980 : /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
981 : #define RESIZEBUF(inf, addsize) \
982 : while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
983 : { \
984 : int len = (inf)->cur - (inf)->buf; \
985 : (inf)->buflen *= 2; \
986 : (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
987 : (inf)->cur = (inf)->buf + len; \
988 : }
989 :
990 : /*
991 : * recursively traverse the tree and
992 : * print it in infix (human-readable) form
993 : */
994 : static void
995 CBC 3623 : infix(INFIX *in, int parentPriority, bool rightPhraseOp)
996 : {
997 : /* since this function recurses, it could be driven to stack overflow. */
998 GIC 3623 : check_stack_depth();
999 ECB :
1000 GIC 3623 : if (in->curpol->type == QI_VAL)
1001 : {
1002 CBC 2096 : QueryOperand *curpol = &in->curpol->qoperand;
1003 2096 : char *op = in->op + curpol->distance;
1004 : int clen;
1005 :
1006 GIC 3420 : RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
1007 2096 : *(in->cur) = '\'';
1008 2096 : in->cur++;
1009 CBC 8128 : while (*op)
1010 : {
1011 6032 : if (t_iseq(op, '\''))
1012 ECB : {
1013 GIC 6 : *(in->cur) = '\'';
1014 CBC 6 : in->cur++;
1015 : }
1016 GIC 6026 : else if (t_iseq(op, '\\'))
1017 : {
1018 3 : *(in->cur) = '\\';
1019 3 : in->cur++;
1020 : }
1021 6032 : COPYCHAR(in->cur, op);
1022 :
1023 6032 : clen = pg_mblen(op);
1024 6032 : op += clen;
1025 6032 : in->cur += clen;
1026 : }
1027 2096 : *(in->cur) = '\'';
1028 2096 : in->cur++;
1029 2096 : if (curpol->weight || curpol->prefix)
1030 : {
1031 87 : *(in->cur) = ':';
1032 87 : in->cur++;
1033 87 : if (curpol->prefix)
1034 : {
1035 12 : *(in->cur) = '*';
1036 12 : in->cur++;
1037 : }
1038 87 : if (curpol->weight & (1 << 3))
1039 : {
1040 30 : *(in->cur) = 'A';
1041 30 : in->cur++;
1042 : }
1043 87 : if (curpol->weight & (1 << 2))
1044 : {
1045 48 : *(in->cur) = 'B';
1046 48 : in->cur++;
1047 : }
1048 CBC 87 : if (curpol->weight & (1 << 1))
1049 : {
1050 GIC 9 : *(in->cur) = 'C';
1051 CBC 9 : in->cur++;
1052 : }
1053 87 : if (curpol->weight & 1)
1054 : {
1055 3 : *(in->cur) = 'D';
1056 3 : in->cur++;
1057 : }
1058 : }
1059 2096 : *(in->cur) = '\0';
1060 2096 : in->curpol++;
1061 ECB : }
1062 CBC 1527 : else if (in->curpol->qoperator.oper == OP_NOT)
1063 : {
1064 186 : int priority = QO_PRIORITY(in->curpol);
1065 :
1066 186 : if (priority < parentPriority)
1067 ECB : {
1068 UIC 0 : RESIZEBUF(in, 2);
1069 LBC 0 : sprintf(in->cur, "( ");
1070 UIC 0 : in->cur = strchr(in->cur, '\0');
1071 ECB : }
1072 CBC 186 : RESIZEBUF(in, 1);
1073 GIC 186 : *(in->cur) = '!';
1074 CBC 186 : in->cur++;
1075 GIC 186 : *(in->cur) = '\0';
1076 CBC 186 : in->curpol++;
1077 ECB :
1078 CBC 186 : infix(in, priority, false);
1079 GIC 186 : if (priority < parentPriority)
1080 ECB : {
1081 LBC 0 : RESIZEBUF(in, 2);
1082 0 : sprintf(in->cur, " )");
1083 UIC 0 : in->cur = strchr(in->cur, '\0');
1084 ECB : }
1085 : }
1086 : else
1087 : {
1088 CBC 1341 : int8 op = in->curpol->qoperator.oper;
1089 1341 : int priority = QO_PRIORITY(in->curpol);
1090 GIC 1341 : int16 distance = in->curpol->qoperator.distance;
1091 ECB : INFIX nrm;
1092 GIC 1341 : bool needParenthesis = false;
1093 ECB :
1094 CBC 1341 : in->curpol++;
1095 GIC 1341 : if (priority < parentPriority ||
1096 ECB : /* phrase operator depends on order */
1097 GIC 360 : (op == OP_PHRASE && rightPhraseOp))
1098 ECB : {
1099 CBC 166 : needParenthesis = true;
1100 GIC 166 : RESIZEBUF(in, 2);
1101 CBC 166 : sprintf(in->cur, "( ");
1102 GIC 166 : in->cur = strchr(in->cur, '\0');
1103 ECB : }
1104 :
1105 GIC 1341 : nrm.curpol = in->curpol;
1106 CBC 1341 : nrm.op = in->op;
1107 GIC 1341 : nrm.buflen = 16;
1108 CBC 1341 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1109 ECB :
1110 : /* get right operand */
1111 GIC 1341 : infix(&nrm, priority, (op == OP_PHRASE));
1112 ECB :
1113 : /* get & print left operand */
1114 GIC 1341 : in->curpol = nrm.curpol;
1115 CBC 1341 : infix(in, priority, false);
1116 :
1117 ECB : /* print operator & right operand */
1118 GIC 1828 : RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
1119 CBC 1341 : switch (op)
1120 : {
1121 GBC 363 : case OP_OR:
1122 363 : sprintf(in->cur, " | %s", nrm.buf);
1123 363 : break;
1124 GIC 612 : case OP_AND:
1125 CBC 612 : sprintf(in->cur, " & %s", nrm.buf);
1126 612 : break;
1127 366 : case OP_PHRASE:
1128 366 : if (distance != 1)
1129 87 : sprintf(in->cur, " <%d> %s", distance, nrm.buf);
1130 : else
1131 279 : sprintf(in->cur, " <-> %s", nrm.buf);
1132 366 : break;
1133 UIC 0 : default:
1134 EUB : /* OP_NOT is handled in above if-branch */
1135 UBC 0 : elog(ERROR, "unrecognized operator type: %d", op);
1136 EUB : }
1137 GIC 1341 : in->cur = strchr(in->cur, '\0');
1138 1341 : pfree(nrm.buf);
1139 :
1140 1341 : if (needParenthesis)
1141 ECB : {
1142 CBC 166 : RESIZEBUF(in, 2);
1143 166 : sprintf(in->cur, " )");
1144 GIC 166 : in->cur = strchr(in->cur, '\0');
1145 ECB : }
1146 : }
1147 CBC 3623 : }
1148 ECB :
1149 : Datum
1150 CBC 770 : tsqueryout(PG_FUNCTION_ARGS)
1151 : {
1152 770 : TSQuery query = PG_GETARG_TSQUERY(0);
1153 ECB : INFIX nrm;
1154 :
1155 CBC 770 : if (query->size == 0)
1156 : {
1157 GIC 15 : char *b = palloc(1);
1158 ECB :
1159 CBC 15 : *b = '\0';
1160 15 : PG_RETURN_POINTER(b);
1161 ECB : }
1162 GIC 755 : nrm.curpol = GETQUERY(query);
1163 755 : nrm.buflen = 32;
1164 CBC 755 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1165 GIC 755 : *(nrm.cur) = '\0';
1166 755 : nrm.op = GETOPERAND(query);
1167 CBC 755 : infix(&nrm, -1 /* lowest priority */ , false);
1168 ECB :
1169 GIC 755 : PG_FREE_IF_COPY(query, 0);
1170 755 : PG_RETURN_CSTRING(nrm.buf);
1171 ECB : }
1172 :
1173 : /*
1174 : * Binary Input / Output functions. The binary format is as follows:
1175 : *
1176 : * uint32 number of operators/operands in the query
1177 : *
1178 : * Followed by the operators and operands, in prefix notation. For each
1179 : * operand:
1180 : *
1181 : * uint8 type, QI_VAL
1182 : * uint8 weight
1183 : * operand text in client encoding, null-terminated
1184 : * uint8 prefix
1185 : *
1186 EUB : * For each operator:
1187 : * uint8 type, QI_OPR
1188 : * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
1189 : * uint16 distance (only for OP_PHRASE)
1190 ECB : */
1191 : Datum
1192 UIC 0 : tsquerysend(PG_FUNCTION_ARGS)
1193 ECB : {
1194 UIC 0 : TSQuery query = PG_GETARG_TSQUERY(0);
1195 ECB : StringInfoData buf;
1196 : int i;
1197 LBC 0 : QueryItem *item = GETQUERY(query);
1198 :
1199 UIC 0 : pq_begintypsend(&buf);
1200 ECB :
1201 UIC 0 : pq_sendint32(&buf, query->size);
1202 0 : for (i = 0; i < query->size; i++)
1203 ECB : {
1204 UIC 0 : pq_sendint8(&buf, item->type);
1205 ECB :
1206 UIC 0 : switch (item->type)
1207 : {
1208 LBC 0 : case QI_VAL:
1209 UIC 0 : pq_sendint8(&buf, item->qoperand.weight);
1210 LBC 0 : pq_sendint8(&buf, item->qoperand.prefix);
1211 UIC 0 : pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
1212 LBC 0 : break;
1213 0 : case QI_OPR:
1214 UIC 0 : pq_sendint8(&buf, item->qoperator.oper);
1215 LBC 0 : if (item->qoperator.oper == OP_PHRASE)
1216 0 : pq_sendint16(&buf, item->qoperator.distance);
1217 0 : break;
1218 0 : default:
1219 0 : elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1220 ECB : }
1221 UIC 0 : item++;
1222 ECB : }
1223 :
1224 UIC 0 : PG_FREE_IF_COPY(query, 0);
1225 :
1226 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
1227 : }
1228 :
1229 : Datum
1230 0 : tsqueryrecv(PG_FUNCTION_ARGS)
1231 : {
1232 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
1233 : TSQuery query;
1234 : int i,
1235 : len;
1236 : QueryItem *item;
1237 : int datalen;
1238 : char *ptr;
1239 : uint32 size;
1240 : const char **operands;
1241 : bool needcleanup;
1242 :
1243 0 : size = pq_getmsgint(buf, sizeof(uint32));
1244 0 : if (size > (MaxAllocSize / sizeof(QueryItem)))
1245 UBC 0 : elog(ERROR, "invalid size of tsquery");
1246 :
1247 EUB : /* Allocate space to temporarily hold operand strings */
1248 UIC 0 : operands = palloc(size * sizeof(char *));
1249 :
1250 EUB : /* Allocate space for all the QueryItems. */
1251 UIC 0 : len = HDRSIZETQ + sizeof(QueryItem) * size;
1252 UBC 0 : query = (TSQuery) palloc0(len);
1253 UIC 0 : query->size = size;
1254 UBC 0 : item = GETQUERY(query);
1255 EUB :
1256 UIC 0 : datalen = 0;
1257 UBC 0 : for (i = 0; i < size; i++)
1258 : {
1259 0 : item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1260 :
1261 0 : if (item->type == QI_VAL)
1262 EUB : {
1263 : size_t val_len; /* length after recoding to server
1264 : * encoding */
1265 : uint8 weight;
1266 : uint8 prefix;
1267 : const char *val;
1268 : pg_crc32 valcrc;
1269 :
1270 UBC 0 : weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1271 0 : prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1272 0 : val = pq_getmsgstring(buf);
1273 UIC 0 : val_len = strlen(val);
1274 EUB :
1275 : /* Sanity checks */
1276 :
1277 UBC 0 : if (weight > 0xF)
1278 UIC 0 : elog(ERROR, "invalid tsquery: invalid weight bitmap");
1279 EUB :
1280 UIC 0 : if (val_len > MAXSTRLEN)
1281 0 : elog(ERROR, "invalid tsquery: operand too long");
1282 :
1283 UBC 0 : if (datalen > MAXSTRPOS)
1284 UIC 0 : elog(ERROR, "invalid tsquery: total operand length exceeded");
1285 EUB :
1286 : /* Looks valid. */
1287 :
1288 UIC 0 : INIT_LEGACY_CRC32(valcrc);
1289 0 : COMP_LEGACY_CRC32(valcrc, val, val_len);
1290 0 : FIN_LEGACY_CRC32(valcrc);
1291 :
1292 0 : item->qoperand.weight = weight;
1293 0 : item->qoperand.prefix = (prefix) ? true : false;
1294 0 : item->qoperand.valcrc = (int32) valcrc;
1295 0 : item->qoperand.length = val_len;
1296 UBC 0 : item->qoperand.distance = datalen;
1297 EUB :
1298 : /*
1299 : * Operand strings are copied to the final struct after this loop;
1300 : * here we just collect them to an array
1301 : */
1302 UIC 0 : operands[i] = val;
1303 :
1304 UBC 0 : datalen += val_len + 1; /* + 1 for the '\0' terminator */
1305 EUB : }
1306 UBC 0 : else if (item->type == QI_OPR)
1307 EUB : {
1308 : int8 oper;
1309 :
1310 UBC 0 : oper = (int8) pq_getmsgint(buf, sizeof(int8));
1311 UIC 0 : if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1312 UBC 0 : elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1313 : (int) oper);
1314 0 : if (i == size - 1)
1315 UIC 0 : elog(ERROR, "invalid pointer to right operand");
1316 :
1317 0 : item->qoperator.oper = oper;
1318 0 : if (oper == OP_PHRASE)
1319 0 : item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1320 : }
1321 : else
1322 0 : elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1323 EUB :
1324 UBC 0 : item++;
1325 EUB : }
1326 :
1327 : /* Enlarge buffer to make room for the operand values. */
1328 UIC 0 : query = (TSQuery) repalloc(query, len + datalen);
1329 0 : item = GETQUERY(query);
1330 UBC 0 : ptr = GETOPERAND(query);
1331 EUB :
1332 : /*
1333 : * Fill in the left-pointers. Checks that the tree is well-formed as a
1334 : * side-effect.
1335 : */
1336 UBC 0 : findoprnd(item, size, &needcleanup);
1337 EUB :
1338 : /* Can't have found any QI_VALSTOP nodes */
1339 UIC 0 : Assert(!needcleanup);
1340 :
1341 EUB : /* Copy operands to output struct */
1342 UBC 0 : for (i = 0; i < size; i++)
1343 EUB : {
1344 UIC 0 : if (item->type == QI_VAL)
1345 EUB : {
1346 UBC 0 : memcpy(ptr, operands[i], item->qoperand.length + 1);
1347 0 : ptr += item->qoperand.length + 1;
1348 EUB : }
1349 UBC 0 : item++;
1350 : }
1351 :
1352 UIC 0 : pfree(operands);
1353 :
1354 0 : Assert(ptr - GETOPERAND(query) == datalen);
1355 EUB :
1356 UIC 0 : SET_VARSIZE(query, len + datalen);
1357 EUB :
1358 UIC 0 : PG_RETURN_TSQUERY(query);
1359 EUB : }
1360 :
1361 : /*
1362 : * debug function, used only for view query
1363 : * which will be executed in non-leaf pages in index
1364 : */
1365 : Datum
1366 UIC 0 : tsquerytree(PG_FUNCTION_ARGS)
1367 EUB : {
1368 UBC 0 : TSQuery query = PG_GETARG_TSQUERY(0);
1369 : INFIX nrm;
1370 EUB : text *res;
1371 : QueryItem *q;
1372 : int len;
1373 :
1374 UIC 0 : if (query->size == 0)
1375 EUB : {
1376 UIC 0 : res = (text *) palloc(VARHDRSZ);
1377 UBC 0 : SET_VARSIZE(res, VARHDRSZ);
1378 UIC 0 : PG_RETURN_POINTER(res);
1379 : }
1380 :
1381 UBC 0 : q = clean_NOT(GETQUERY(query), &len);
1382 EUB :
1383 UBC 0 : if (!q)
1384 : {
1385 UIC 0 : res = cstring_to_text("T");
1386 : }
1387 : else
1388 : {
1389 UBC 0 : nrm.curpol = q;
1390 UIC 0 : nrm.buflen = 32;
1391 0 : nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1392 UBC 0 : *(nrm.cur) = '\0';
1393 UIC 0 : nrm.op = GETOPERAND(query);
1394 0 : infix(&nrm, -1, false);
1395 UBC 0 : res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1396 UIC 0 : pfree(q);
1397 EUB : }
1398 :
1399 UBC 0 : PG_FREE_IF_COPY(query, 0);
1400 EUB :
1401 UIC 0 : PG_RETURN_TEXT_P(res);
1402 EUB : }
|