Age Owner TLA Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * psqlscan.l
5 : * lexical scanner for SQL commands
6 : *
7 : * This lexer used to be part of psql, and that heritage is reflected in
8 : * the file name as well as function and typedef names, though it can now
9 : * be used by other frontend programs as well. It's also possible to extend
10 : * this lexer with a compatible add-on lexer to handle program-specific
11 : * backslash commands.
12 : *
13 : * This code is mainly concerned with determining where the end of a SQL
14 : * statement is: we are looking for semicolons that are not within quotes,
15 : * comments, or parentheses. The most reliable way to handle this is to
16 : * borrow the backend's flex lexer rules, lock, stock, and barrel. The rules
17 : * below are (except for a few) the same as the backend's, but their actions
18 : * are just ECHO whereas the backend's actions generally do other things.
19 : *
20 : * XXX The rules in this file must be kept in sync with the backend lexer!!!
21 : *
22 : * XXX Avoid creating backtracking cases --- see the backend lexer for info.
23 : *
24 : * See psqlscan_int.h for additional commentary.
25 : *
26 : *
27 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
28 : * Portions Copyright (c) 1994, Regents of the University of California
29 : *
30 : * IDENTIFICATION
31 : * src/fe_utils/psqlscan.l
32 : *
33 : *-------------------------------------------------------------------------
34 : */
35 : #include "postgres_fe.h"
36 :
37 : #include "common/logging.h"
38 : #include "fe_utils/psqlscan.h"
39 :
40 : #include "libpq-fe.h"
41 : }
42 :
43 : %{
44 :
45 : /* LCOV_EXCL_START */
46 :
47 : #include "fe_utils/psqlscan_int.h"
48 :
49 : /*
50 : * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
51 : * doesn't presently make use of that argument, so just declare it as int.
52 : */
53 : typedef int YYSTYPE;
54 :
55 : /*
56 : * Set the type of yyextra; we use it as a pointer back to the containing
57 : * PsqlScanState.
58 : */
59 : #define YY_EXTRA_TYPE PsqlScanState
60 :
61 :
62 : /* Return values from yylex() */
63 : #define LEXRES_EOL 0 /* end of input */
64 : #define LEXRES_SEMI 1 /* command-terminating semicolon found */
65 : #define LEXRES_BACKSLASH 2 /* backslash command start */
66 :
67 :
68 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
69 :
70 : /*
71 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
72 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
73 : * this would cause warnings. Providing our own declarations should be
74 : * harmless even when the bug gets fixed.
75 : */
76 : extern int psql_yyget_column(yyscan_t yyscanner);
77 : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
78 :
79 : %}
80 :
81 : %option reentrant
82 : %option bison-bridge
83 : %option 8bit
84 : %option never-interactive
85 : %option nodefault
86 : %option noinput
87 : %option nounput
88 : %option noyywrap
89 : %option warn
90 : %option prefix="psql_yy"
91 :
92 : /*
93 : * All of the following definitions and rules should exactly match
94 : * src/backend/parser/scan.l so far as the flex patterns are concerned.
95 : * The rule bodies are just ECHO as opposed to what the backend does,
96 : * however. (But be sure to duplicate code that affects the lexing process,
97 : * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
98 : * whereas scan.l has a separate one for each exclusive state.
99 : */
100 :
101 : /*
102 : * OK, here is a short description of lex/flex rules behavior.
103 : * The longest pattern which matches an input string is always chosen.
104 : * For equal-length patterns, the first occurring in the rules list is chosen.
105 : * INITIAL is the starting state, to which all non-conditional rules apply.
106 : * Exclusive states change parsing rules while the state is active. When in
107 : * an exclusive state, only those rules defined for that state apply.
108 : *
109 : * We use exclusive states for quoted strings, extended comments,
110 : * and to eliminate parsing troubles for numeric strings.
111 : * Exclusive states:
112 : * <xb> bit string literal
113 : * <xc> extended C-style comments
114 : * <xd> delimited identifiers (double-quoted identifiers)
115 : * <xh> hexadecimal byte string
116 : * <xq> standard quoted strings
117 : * <xqs> quote stop (detect continued strings)
118 : * <xe> extended quoted strings (support backslash escape sequences)
119 : * <xdolq> $foo$ quoted strings
120 : * <xui> quoted identifier with Unicode escapes
121 : * <xus> quoted string with Unicode escapes
122 : *
123 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
124 : * no need to distinguish it from <xe> state, and no good way to get out
125 : * of it in error cases. The backend just throws yyerror() in those
126 : * cases, but that's not an option here.
127 : */
128 :
129 : %x xb
130 : %x xc
131 : %x xd
132 : %x xh
133 : %x xq
134 : %x xqs
135 : %x xe
136 : %x xdolq
137 : %x xui
138 : %x xus
139 :
140 : /*
141 : * In order to make the world safe for Windows and Mac clients as well as
142 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
143 : * sequence will be seen as two successive newlines, but that doesn't cause
144 : * any problems. Comments that start with -- and extend to the next
145 : * newline are treated as equivalent to a single whitespace character.
146 : *
147 : * NOTE a fine point: if there is no newline following --, we will absorb
148 : * everything to the end of the input as a comment. This is correct. Older
149 : * versions of Postgres failed to recognize -- as a comment if the input
150 : * did not end with a newline.
151 : *
152 : * XXX perhaps \f (formfeed) should be treated as a newline as well?
153 : *
154 : * XXX if you change the set of whitespace characters, fix scanner_isspace()
155 : * to agree.
156 : */
157 :
158 : space [ \t\n\r\f]
159 : horiz_space [ \t\f]
160 : newline [\n\r]
161 : non_newline [^\n\r]
162 :
163 : comment ("--"{non_newline}*)
164 :
165 : whitespace ({space}+|{comment})
166 :
167 : /*
168 : * SQL requires at least one newline in the whitespace separating
169 : * string literals that are to be concatenated. Silly, but who are we
170 : * to argue? Note that {whitespace_with_newline} should not have * after
171 : * it, whereas {whitespace} should generally have a * after it...
172 : */
173 :
174 : special_whitespace ({space}+|{comment}{newline})
175 : horiz_whitespace ({horiz_space}|{comment})
176 : whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
177 :
178 : quote '
179 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
180 : quotecontinue {whitespace_with_newline}{quote}
181 :
182 : /*
183 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
184 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
185 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
186 : * to see if there's another dash --- which would start a {comment} and thus
187 : * allow continuation of the {quotecontinue} token.
188 : */
189 : quotecontinuefail {whitespace}*"-"?
190 :
191 : /* Bit string
192 : * It is tempting to scan the string for only those characters
193 : * which are allowed. However, this leads to silently swallowed
194 : * characters if illegal characters are included in the string.
195 : * For example, if xbinside is [01] then B'ABCD' is interpreted
196 : * as a zero-length string, and the ABCD' is lost!
197 : * Better to pass the string forward and let the input routines
198 : * validate the contents.
199 : */
200 : xbstart [bB]{quote}
201 : xbinside [^']*
202 :
203 : /* Hexadecimal byte string */
204 : xhstart [xX]{quote}
205 : xhinside [^']*
206 :
207 : /* National character */
208 : xnstart [nN]{quote}
209 :
210 : /* Quoted string that allows backslash escapes */
211 : xestart [eE]{quote}
212 : xeinside [^\\']+
213 : xeescape [\\][^0-7]
214 : xeoctesc [\\][0-7]{1,3}
215 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
216 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
217 : xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
218 :
219 : /* Extended quote
220 : * xqdouble implements embedded quote, ''''
221 : */
222 : xqstart {quote}
223 : xqdouble {quote}{quote}
224 : xqinside [^']+
225 :
226 : /* $foo$ style quotes ("dollar quoting")
227 : * The quoted string starts with $foo$ where "foo" is an optional string
228 : * in the form of an identifier, except that it may not contain "$",
229 : * and extends to the first occurrence of an identical string.
230 : * There is *no* processing of the quoted text.
231 : *
232 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
233 : * fails to match its trailing "$".
234 : */
235 : dolq_start [A-Za-z\200-\377_]
236 : dolq_cont [A-Za-z\200-\377_0-9]
237 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
238 : dolqfailed \${dolq_start}{dolq_cont}*
239 : dolqinside [^$]+
240 :
241 : /* Double quote
242 : * Allows embedded spaces and other special characters into identifiers.
243 : */
244 : dquote \"
245 : xdstart {dquote}
246 : xdstop {dquote}
247 : xddouble {dquote}{dquote}
248 : xdinside [^"]+
249 :
250 : /* Quoted identifier with Unicode escapes */
251 : xuistart [uU]&{dquote}
252 :
253 : /* Quoted string with Unicode escapes */
254 : xusstart [uU]&{quote}
255 :
256 : /* error rule to avoid backup */
257 : xufailed [uU]&
258 :
259 :
260 : /* C-style comments
261 : *
262 : * The "extended comment" syntax closely resembles allowable operator syntax.
263 : * The tricky part here is to get lex to recognize a string starting with
264 : * slash-star as a comment, when interpreting it as an operator would produce
265 : * a longer match --- remember lex will prefer a longer match! Also, if we
266 : * have something like plus-slash-star, lex will think this is a 3-character
267 : * operator whereas we want to see it as a + operator and a comment start.
268 : * The solution is two-fold:
269 : * 1. append {op_chars}* to xcstart so that it matches as much text as
270 : * {operator} would. Then the tie-breaker (first matching rule of same
271 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
272 : * in case it contains a star-slash that should terminate the comment.
273 : * 2. In the operator rule, check for slash-star within the operator, and
274 : * if found throw it back with yyless(). This handles the plus-slash-star
275 : * problem.
276 : * Dash-dash comments have similar interactions with the operator rule.
277 : */
278 : xcstart \/\*{op_chars}*
279 : xcstop \*+\/
280 : xcinside [^*/]+
281 :
282 : ident_start [A-Za-z\200-\377_]
283 : ident_cont [A-Za-z\200-\377_0-9\$]
284 :
285 : identifier {ident_start}{ident_cont}*
286 :
287 : /* Assorted special-case operators and operator-like tokens */
288 : typecast "::"
289 : dot_dot \.\.
290 : colon_equals ":="
291 :
292 : /*
293 : * These operator-like tokens (unlike the above ones) also match the {operator}
294 : * rule, which means that they might be overridden by a longer match if they
295 : * are followed by a comment start or a + or - character. Accordingly, if you
296 : * add to this list, you must also add corresponding code to the {operator}
297 : * block to return the correct token in such cases. (This is not needed in
298 : * psqlscan.l since the token value is ignored there.)
299 : */
300 : equals_greater "=>"
301 : less_equals "<="
302 : greater_equals ">="
303 : less_greater "<>"
304 : not_equals "!="
305 :
306 : /*
307 : * "self" is the set of chars that should be returned as single-character
308 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
309 : * which can be one or more characters long (but if a single-char token
310 : * appears in the "self" set, it is not to be returned as an Op). Note
311 : * that the sets overlap, but each has some chars that are not in the other.
312 : *
313 : * If you change either set, adjust the character lists appearing in the
314 : * rule for "operator"!
315 : */
316 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
317 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
318 : operator {op_chars}+
319 :
320 : /*
321 : * Numbers
322 : *
323 : * Unary minus is not part of a number here. Instead we pass it separately to
324 : * the parser, and there it gets coerced via doNegate().
325 : *
326 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
327 : *
328 : * {realfail} is added to prevent the need for scanner
329 : * backup when the {real} rule fails to match completely.
330 : */
331 : decdigit [0-9]
332 : hexdigit [0-9A-Fa-f]
333 : octdigit [0-7]
334 : bindigit [0-1]
335 :
336 : decinteger {decdigit}(_?{decdigit})*
337 : hexinteger 0[xX](_?{hexdigit})+
338 : octinteger 0[oO](_?{octdigit})+
339 : bininteger 0[bB](_?{bindigit})+
340 :
341 : hexfail 0[xX]_?
342 : octfail 0[oO]_?
343 : binfail 0[bB]_?
344 :
345 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
346 : numericfail {decdigit}+\.\.
347 :
348 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
349 : realfail ({decinteger}|{numeric})[Ee][-+]
350 :
351 : decinteger_junk {decinteger}{ident_start}
352 : hexinteger_junk {hexinteger}{ident_start}
353 : octinteger_junk {octinteger}{ident_start}
354 : bininteger_junk {bininteger}{ident_start}
355 : numeric_junk {numeric}{ident_start}
356 : real_junk {real}{ident_start}
357 :
358 : param \${decinteger}
359 : param_junk \${decinteger}{ident_start}
360 :
361 : /* psql-specific: characters allowed in variable names */
362 : variable_char [A-Za-z\200-\377_0-9]
363 :
364 : other .
365 :
366 : /*
367 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
368 : * Other quoted strings must allow some special characters such as single-quote
369 : * and newline.
370 : * Embedded single-quotes are implemented both in the SQL standard
371 : * style of two adjacent single quotes "''" and in the Postgres/Java style
372 : * of escaped-quote "\'".
373 : * Other embedded escaped characters are matched explicitly and the leading
374 : * backslash is dropped from the string.
375 : * Note that xcstart must appear before operator, as explained above!
376 : * Also whitespace (comment) must appear before operator.
377 : */
378 :
379 : %%
380 :
381 : %{
382 : /* Declare some local variables inside yylex(), for convenience */
383 : PsqlScanState cur_state = yyextra;
2578 tgl 384 GIC 425182 : PQExpBuffer output_buf = cur_state->output_buf;
385 425182 :
386 : /*
387 : * Force flex into the state indicated by start_state. This has a
388 : * couple of purposes: it lets some of the functions below set a new
389 : * starting state without ugly direct access to flex variables, and it
390 : * allows us to transition from one flex lexer to another so that we
391 : * can lex different parts of the source string using separate lexers.
392 : */
393 : BEGIN(cur_state->start_state);
394 425182 : %}
395 :
396 : {whitespace} {
397 : /*
398 : * Note that the whitespace rule includes both true
6989 tgl 399 ECB : * whitespace and single-line ("--" style) comments.
494 400 : * We suppress whitespace until we have collected some
401 : * non-whitespace data. (This interacts with some
402 : * decisions in MainLoop(); see there for details.)
403 : */
404 : if (output_buf->len > 0)
6989 tgl 405 GIC 1061569 : ECHO;
406 997482 : }
407 :
408 1061569 : {xcstart} {
6989 tgl 409 CBC 306 : cur_state->xcdepth = 0;
6989 tgl 410 GIC 306 : BEGIN(xc);
411 306 : /* Put back any characters past slash-star; see above */
412 : yyless(2);
413 306 : ECHO;
414 306 : }
415 :
1608 416 306 : <xc>{
417 : {xcstart} {
6989 418 9 : cur_state->xcdepth++;
419 9 : /* Put back any characters past slash-star; see above */
6989 tgl 420 ECB : yyless(2);
6989 tgl 421 CBC 9 : ECHO;
6989 tgl 422 GIC 9 : }
6989 tgl 423 ECB :
1608 tgl 424 CBC 9 : {xcstop} {
6989 425 315 : if (cur_state->xcdepth <= 0)
426 315 : BEGIN(INITIAL);
6989 tgl 427 GIC 306 : else
6989 tgl 428 ECB : cur_state->xcdepth--;
6989 tgl 429 CBC 9 : ECHO;
6989 tgl 430 GIC 315 : }
6989 tgl 431 ECB :
1608 tgl 432 GIC 315 : {xcinside} {
6989 tgl 433 CBC 716 : ECHO;
434 716 : }
435 :
1608 436 716 : {op_chars} {
6989 437 193 : ECHO;
6989 tgl 438 GIC 193 : }
6989 tgl 439 ECB :
1608 tgl 440 CBC 193 : \*+ {
6527 tgl 441 LBC 0 : ECHO;
442 0 : }
443 : } /* <xc> */
444 0 :
6989 tgl 445 ECB : {xbstart} {
6989 tgl 446 GIC 369 : BEGIN(xb);
6989 tgl 447 CBC 369 : ECHO;
448 369 : }
6989 tgl 449 ECB : <xh>{xhinside} |
6989 tgl 450 GIC 369 : <xb>{xbinside} {
6989 tgl 451 CBC 1997 : ECHO;
452 1997 : }
6989 tgl 453 ECB :
6989 tgl 454 GIC 1997 : {xhstart} {
6989 tgl 455 CBC 1643 : /* Hexadecimal bit type.
6989 tgl 456 EUB : * At some point we should simply pass the string
457 : * forward to the parser and label it there.
458 : * In the meantime, place a leading "x" on the string
459 : * to mark it for the input routine as a hex string.
460 : */
6989 tgl 461 ECB : BEGIN(xh);
6989 tgl 462 CBC 1643 : ECHO;
463 1643 : }
464 :
465 1643 : {xnstart} {
2577 tgl 466 LBC 0 : yyless(1); /* eat only 'n' this time */
6989 467 0 : ECHO;
6989 tgl 468 UIC 0 : }
6989 tgl 469 ECB :
6989 tgl 470 LBC 0 : {xqstart} {
2578 tgl 471 GIC 89353 : if (cur_state->std_strings)
6243 bruce 472 89353 : BEGIN(xq);
473 89299 : else
474 : BEGIN(xe);
6989 tgl 475 54 : ECHO;
476 89353 : }
6496 tgl 477 ECB : {xestart} {
6243 bruce 478 CBC 89353 : BEGIN(xe);
6496 tgl 479 GIC 560 : ECHO;
6496 tgl 480 CBC 560 : }
5275 peter_e 481 EUB : {xusstart} {
5275 peter_e 482 GBC 560 : BEGIN(xus);
483 136 : ECHO;
5275 peter_e 484 GIC 136 : }
1182 tgl 485 EUB :
1182 tgl 486 CBC 136 : <xb,xh,xq,xe,xus>{quote} {
487 92061 : /*
1182 tgl 488 ECB : * When we are scanning a quoted string and see an end
489 : * quote, we must look ahead for a possible continuation.
490 : * If we don't see one, we know the end quote was in fact
491 : * the end of the string. To reduce the lexer table size,
492 : * we use a single "xqs" state to do the lookahead for all
493 : * types of strings.
494 : */
495 : cur_state->state_before_str_stop = YYSTATE;
1182 tgl 496 GIC 92061 : BEGIN(xqs);
3678 heikki.linnakangas 497 CBC 92061 : ECHO;
498 92061 : }
1182 tgl 499 ECB : <xqs>{quotecontinue} {
1182 tgl 500 GIC 92061 : /*
1182 tgl 501 LBC 0 : * Found a quote continuation, so return to the in-quote
1182 tgl 502 ECB : * state and continue scanning the literal. Nothing is
503 : * added to the literal's contents.
504 : */
505 : BEGIN(cur_state->state_before_str_stop);
3678 heikki.linnakangas 506 UIC 0 : ECHO;
507 0 : }
508 : <xqs>{quotecontinuefail} |
1182 tgl 509 0 : <xqs>{other} {
1182 tgl 510 GIC 91762 : /*
1182 tgl 511 ECB : * Failed to see a quote continuation. Throw back
512 : * everything after the end quote, and handle the string
513 : * according to the state we were in previously.
514 : */
3678 heikki.linnakangas 515 : yyless(0);
5275 peter_e 516 GBC 91762 : BEGIN(INITIAL);
1182 tgl 517 GIC 91762 : /* There's nothing to echo ... */
518 : }
519 :
5275 peter_e 520 91762 : <xq,xe,xus>{xqdouble} {
6989 tgl 521 GBC 3085 : ECHO;
522 3085 : }
523 : <xq,xus>{xqinside} {
524 3085 : ECHO;
6989 tgl 525 CBC 94455 : }
526 : <xe>{xeinside} {
6243 bruce 527 GIC 94455 : ECHO;
528 1309 : }
529 : <xe>{xeunicode} {
4942 tgl 530 1309 : ECHO;
4942 tgl 531 CBC 78 : }
4548 tgl 532 ECB : <xe>{xeunicodefail} {
4942 tgl 533 GIC 78 : ECHO;
534 6 : }
6243 bruce 535 ECB : <xe>{xeescape} {
6989 tgl 536 CBC 6 : ECHO;
537 715 : }
538 : <xe>{xeoctesc} {
539 715 : ECHO;
540 12 : }
541 : <xe>{xehexesc} {
6520 bruce 542 12 : ECHO;
543 6 : }
544 : <xe>. {
6984 tgl 545 6 : /* This is only needed for \ just before EOF */
6984 tgl 546 LBC 0 : ECHO;
6984 tgl 547 UIC 0 : }
6984 tgl 548 ECB :
6984 tgl 549 LBC 0 : {dolqdelim} {
6984 tgl 550 GIC 3220 : cur_state->dolqstart = pg_strdup(yytext);
6984 tgl 551 CBC 3220 : BEGIN(xdolq);
552 3220 : ECHO;
6984 tgl 553 GIC 3220 : }
6527 tgl 554 ECB : {dolqfailed} {
6527 tgl 555 CBC 3220 : /* throw back all but the initial "$" */
6527 tgl 556 UIC 0 : yyless(1);
6527 tgl 557 LBC 0 : ECHO;
558 0 : }
559 : <xdolq>{dolqdelim} {
6984 560 0 : if (strcmp(yytext, cur_state->dolqstart) == 0)
6984 tgl 561 GBC 3406 : {
6984 tgl 562 EUB : free(cur_state->dolqstart);
6984 tgl 563 GIC 3220 : cur_state->dolqstart = NULL;
6984 tgl 564 GBC 3220 : BEGIN(INITIAL);
6984 tgl 565 CBC 3220 : }
6984 tgl 566 ECB : else
567 : {
568 : /*
569 : * When we fail to match $...$ to dolqstart, transfer
570 : * the $... part to the output, but put back the final
6984 tgl 571 EUB : * $ for rescanning. Consider $delim$...$junk$delim$
572 : */
2577 573 : yyless(yyleng - 1);
6984 tgl 574 GIC 186 : }
6984 tgl 575 EUB : ECHO;
6984 tgl 576 CBC 3406 : }
577 : <xdolq>{dolqinside} {
578 3406 : ECHO;
579 16945 : }
6527 tgl 580 ECB : <xdolq>{dolqfailed} {
6527 tgl 581 GIC 16945 : ECHO;
582 541 : }
583 : <xdolq>. {
6984 584 541 : /* This is only needed for $ inside the quoted text */
585 1132 : ECHO;
586 1132 : }
587 :
6989 588 1132 : {xdstart} {
6989 tgl 589 CBC 4289 : BEGIN(xd);
6989 tgl 590 GIC 4289 : ECHO;
6989 tgl 591 CBC 4289 : }
592 : {xuistart} {
5275 peter_e 593 4289 : BEGIN(xui);
594 18 : ECHO;
5275 peter_e 595 GIC 18 : }
6989 tgl 596 ECB : <xd>{xdstop} {
6989 tgl 597 CBC 18 : BEGIN(INITIAL);
6989 tgl 598 GIC 4289 : ECHO;
6989 tgl 599 CBC 4289 : }
1182 tgl 600 ECB : <xui>{dquote} {
5275 peter_e 601 CBC 4289 : BEGIN(INITIAL);
6989 tgl 602 GIC 18 : ECHO;
6989 tgl 603 CBC 18 : }
5275 peter_e 604 ECB : <xd,xui>{xddouble} {
5275 peter_e 605 CBC 18 : ECHO;
606 33 : }
607 : <xd,xui>{xdinside} {
608 33 : ECHO;
609 4340 : }
5275 peter_e 610 ECB :
5275 peter_e 611 GIC 4340 : {xufailed} {
5275 peter_e 612 LBC 0 : /* throw back all but the initial u/U */
5275 peter_e 613 ECB : yyless(1);
6989 tgl 614 LBC 0 : ECHO;
6989 tgl 615 UIC 0 : }
6989 tgl 616 ECB :
6989 tgl 617 LBC 0 : {typecast} {
6989 tgl 618 CBC 21950 : ECHO;
6989 tgl 619 GIC 21950 : }
6989 tgl 620 ECB :
4942 tgl 621 CBC 21950 : {dot_dot} {
4942 tgl 622 UIC 0 : ECHO;
4942 tgl 623 LBC 0 : }
4942 tgl 624 ECB :
4942 tgl 625 UIC 0 : {colon_equals} {
4942 tgl 626 CBC 1512 : ECHO;
4942 tgl 627 GBC 1512 : }
628 :
2952 629 1512 : {equals_greater} {
630 316 : ECHO;
2952 tgl 631 GIC 316 : }
2952 tgl 632 EUB :
2951 tgl 633 CBC 316 : {less_equals} {
634 1117 : ECHO;
2951 tgl 635 GIC 1117 : }
2951 tgl 636 ECB :
2951 tgl 637 GBC 1117 : {greater_equals} {
638 959 : ECHO;
2951 tgl 639 GIC 959 : }
2951 tgl 640 EUB :
2951 tgl 641 CBC 959 : {less_greater} {
642 535 : ECHO;
2951 tgl 643 GIC 535 : }
2951 tgl 644 ECB :
2951 tgl 645 CBC 535 : {not_equals} {
646 1037 : ECHO;
2951 tgl 647 GIC 1037 : }
2951 tgl 648 ECB :
6989 tgl 649 CBC 1037 : /*
6989 tgl 650 ECB : * These rules are specific to psql --- they implement parenthesis
651 : * counting and detection of command-ending semicolon. These must
652 : * appear before the {self} rule so that they take precedence over it.
653 : */
654 :
6989 tgl 655 GIC 144470 : "(" {
6989 tgl 656 ECB : cur_state->paren_depth++;
6989 tgl 657 CBC 144470 : ECHO;
658 144470 : }
659 :
660 144470 : ")" {
661 144463 : if (cur_state->paren_depth > 0)
662 144463 : cur_state->paren_depth--;
6989 tgl 663 GIC 144463 : ECHO;
6989 tgl 664 CBC 144463 : }
665 :
6989 tgl 666 GIC 144463 : ";" {
667 142199 : ECHO;
732 peter 668 142199 : if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
6989 tgl 669 142199 : {
6989 tgl 670 ECB : /* Terminate lexing temporarily */
671 : cur_state->start_state = YY_START;
732 peter 672 CBC 142127 : cur_state->identifier_count = 0;
6989 tgl 673 142127 : return LEXRES_SEMI;
6989 tgl 674 GIC 142127 : }
6989 tgl 675 ECB : }
676 :
6989 tgl 677 CBC 72 : /*
6989 tgl 678 ECB : * psql-specific rules to handle backslash commands and variable
679 : * substitution. We want these before {self}, also.
680 : */
681 :
1476 alvherre 682 CBC 377 : "\\"[;:] {
1476 alvherre 683 ECB : /* Force a semi-colon or colon into the query buffer */
2577 tgl 684 : psqlscan_emit(cur_state, yytext + 1, 1);
732 peter 685 GIC 377 : if (yytext[1] == ';')
686 377 : cur_state->identifier_count = 0;
6989 tgl 687 CBC 377 : }
6989 tgl 688 ECB :
6989 tgl 689 CBC 377 : "\\" {
6989 tgl 690 GIC 6648 : /* Terminate lexing temporarily */
691 : cur_state->start_state = YY_START;
6989 tgl 692 CBC 6648 : return LEXRES_BACKSLASH;
6989 tgl 693 GIC 6648 : }
694 :
695 : :{variable_char}+ {
696 1366 : /* Possible psql variable substitution */
2577 tgl 697 ECB : char *varname;
698 : char *value;
699 :
700 : varname = psqlscan_extract_substring(cur_state,
2577 tgl 701 CBC 1366 : yytext + 1,
702 1366 : yyleng - 1);
2578 tgl 703 GIC 1366 : if (cur_state->callbacks->get_variable)
2578 tgl 704 CBC 1366 : value = cur_state->callbacks->get_variable(varname,
2199 705 770 : PQUOTE_PLAIN,
706 : cur_state->cb_passthrough);
2578 tgl 707 ECB : else
708 : value = NULL;
6989 tgl 709 GIC 596 :
710 : if (value)
6989 tgl 711 CBC 1366 : {
712 : /* It is a variable, check for recursion */
713 : if (psqlscan_var_is_current_source(cur_state, varname))
4722 tgl 714 GIC 566 : {
715 : /* Recursive expansion --- don't go there */
1469 peter 716 ECB : pg_log_warning("skipping recursive expansion of variable \"%s\"",
2578 tgl 717 LBC 0 : varname);
4722 tgl 718 ECB : /* Instead copy the string as is */
719 : ECHO;
4722 tgl 720 LBC 0 : }
721 : else
722 : {
723 : /* OK, perform substitution */
2577 tgl 724 ECB : psqlscan_push_new_buffer(cur_state, value, varname);
4722 tgl 725 GIC 566 : /* yy_scan_string already made buffer active */
4722 tgl 726 ECB : }
727 : free(value);
6989 tgl 728 GIC 566 : }
6989 tgl 729 ECB : else
730 : {
731 : /*
2577 tgl 732 EUB : * if the variable doesn't exist we'll copy the string
733 : * as is
734 : */
6989 735 : ECHO;
6989 tgl 736 GIC 800 : }
737 :
738 : free(varname);
739 1366 : }
6989 tgl 740 ECB :
4244 tgl 741 GIC 1366 : :'{variable_char}+' {
2199 742 347 : psqlscan_escape_variable(cur_state, yytext, yyleng,
2199 tgl 743 CBC 347 : PQUOTE_SQL_LITERAL);
744 : }
745 :
4244 tgl 746 GIC 347 : :\"{variable_char}+\" {
2199 747 6 : psqlscan_escape_variable(cur_state, yytext, yyleng,
748 6 : PQUOTE_SQL_IDENT);
749 : }
750 :
2026 andrew 751 CBC 6 : :\{\?{variable_char}+\} {
2026 andrew 752 GIC 6 : psqlscan_test_variable(cur_state, yytext, yyleng);
753 6 : }
2026 andrew 754 ECB :
4245 tgl 755 GIC 6 : /*
4245 tgl 756 ECB : * These rules just avoid the need for scanner backup if one of the
2026 andrew 757 : * three rules above fails to match completely.
4245 tgl 758 : */
759 :
4244 tgl 760 UIC 0 : :'{variable_char}* {
4245 tgl 761 ECB : /* Throw back everything but the colon */
762 : yyless(1);
4245 tgl 763 LBC 0 : ECHO;
4245 tgl 764 UIC 0 : }
765 :
4244 tgl 766 LBC 0 : :\"{variable_char}* {
4245 767 0 : /* Throw back everything but the colon */
4245 tgl 768 ECB : yyless(1);
4245 tgl 769 UIC 0 : ECHO;
4245 tgl 770 LBC 0 : }
771 :
2026 andrew 772 UIC 0 : :\{\?{variable_char}* {
773 0 : /* Throw back everything but the colon */
774 : yyless(1);
2026 andrew 775 UBC 0 : ECHO;
2026 andrew 776 UIC 0 : }
777 : :\{ {
2026 andrew 778 UBC 0 : /* Throw back everything but the colon */
779 0 : yyless(1);
2026 andrew 780 UIC 0 : ECHO;
2026 andrew 781 UBC 0 : }
2026 andrew 782 EUB :
6989 tgl 783 UIC 0 : /*
6989 tgl 784 EUB : * Back to backend-compatible rules.
785 : */
786 :
6989 tgl 787 GBC 255148 : {self} {
6989 tgl 788 EUB : ECHO;
6989 tgl 789 GIC 255148 : }
6989 tgl 790 EUB :
6989 tgl 791 GBC 255148 : {operator} {
6989 tgl 792 GIC 9094 : /*
6989 tgl 793 EUB : * Check for embedded slash-star or dash-dash; those
794 : * are comment starts, so operator must stop there.
795 : * Note that slash-star or dash-dash at the first
796 : * character will match a prior rule, not this one.
797 : */
2577 798 : int nchars = yyleng;
2577 tgl 799 GIC 9094 : char *slashstar = strstr(yytext, "/*");
800 9094 : char *dashdash = strstr(yytext, "--");
6989 801 9094 :
6989 tgl 802 ECB : if (slashstar && dashdash)
6989 tgl 803 GIC 9094 : {
6989 tgl 804 ECB : /* if both appear, take the first one */
805 : if (slashstar > dashdash)
6989 tgl 806 LBC 0 : slashstar = dashdash;
807 0 : }
808 : else if (!slashstar)
6989 tgl 809 GIC 9094 : slashstar = dashdash;
810 9064 : if (slashstar)
811 9094 : nchars = slashstar - yytext;
812 36 :
813 : /*
6989 tgl 814 ECB : * For SQL compatibility, '+' and '-' cannot be the
815 : * last char of a multi-char operator unless the operator
816 : * contains chars that are not in SQL operators.
817 : * The idea is to lex '=-' as two operators, but not
818 : * to forbid operator names like '?-' that could not be
819 : * sequences of SQL operators.
820 : */
1690 rhodiumtoad 821 EUB : if (nchars > 1 &&
1690 rhodiumtoad 822 GBC 9094 : (yytext[nchars - 1] == '+' ||
1690 rhodiumtoad 823 GIC 8253 : yytext[nchars - 1] == '-'))
6989 tgl 824 CBC 8250 : {
2577 tgl 825 ECB : int ic;
6989 826 :
2577 827 : for (ic = nchars - 2; ic >= 0; ic--)
6989 tgl 828 GIC 273 : {
829 : char c = yytext[ic];
1690 rhodiumtoad 830 234 : if (c == '~' || c == '!' || c == '@' ||
831 234 : c == '#' || c == '^' || c == '&' ||
832 192 : c == '|' || c == '`' || c == '?' ||
833 75 : c == '%')
834 : break;
835 : }
836 : if (ic < 0)
1690 rhodiumtoad 837 CBC 207 : {
1690 rhodiumtoad 838 ECB : /*
839 : * didn't find a qualifying character, so remove
840 : * all trailing [+-]
841 : */
842 : do {
843 : nchars--;
1690 rhodiumtoad 844 GIC 39 : } while (nchars > 1 &&
1690 rhodiumtoad 845 CBC 39 : (yytext[nchars - 1] == '+' ||
846 18 : yytext[nchars - 1] == '-'));
847 18 : }
6989 tgl 848 ECB : }
849 :
850 : if (nchars < yyleng)
6989 tgl 851 GIC 9094 : {
6989 tgl 852 ECB : /* Strip the unwanted chars from the token */
853 : yyless(nchars);
6989 tgl 854 GIC 75 : }
855 : ECHO;
856 9094 : }
857 :
858 9094 : {param} {
6989 tgl 859 CBC 227 : ECHO;
860 227 : }
417 peter 861 ECB : {param_junk} {
417 peter 862 CBC 227 : ECHO;
417 peter 863 GIC 3 : }
864 :
116 peter 865 GNC 3 : {decinteger} {
116 peter 866 CBC 84962 : ECHO;
116 peter 867 GIC 84962 : }
868 : {hexinteger} {
116 peter 869 CBC 84962 : ECHO;
116 peter 870 GIC 30 : }
871 : {octinteger} {
116 peter 872 GNC 30 : ECHO;
873 30 : }
874 : {bininteger} {
875 30 : ECHO;
876 30 : }
877 : {hexfail} {
6989 tgl 878 30 : ECHO;
879 3 : }
880 : {octfail} {
881 3 : ECHO;
882 3 : }
883 : {binfail} {
116 peter 884 3 : ECHO;
885 3 : }
886 : {numeric} {
887 3 : ECHO;
888 3553 : }
889 : {numericfail} {
4896 tgl 890 GIC 3553 : /* throw back the .., and treat as integer */
2577 tgl 891 LBC 0 : yyless(yyleng - 2);
4896 892 0 : ECHO;
893 0 : }
894 : {real} {
6989 895 0 : ECHO;
6989 tgl 896 CBC 159 : }
897 : {realfail} {
6527 898 159 : ECHO;
899 3 : }
900 : {decinteger_junk} {
116 peter 901 GIC 3 : ECHO;
116 peter 902 CBC 21 : }
903 : {hexinteger_junk} {
116 peter 904 GNC 21 : ECHO;
905 6 : }
906 : {octinteger_junk} {
907 6 : ECHO;
908 3 : }
909 : {bininteger_junk} {
417 910 3 : ECHO;
911 3 : }
912 : {numeric_junk} {
417 peter 913 GIC 3 : ECHO;
417 peter 914 CBC 21 : }
417 peter 915 ECB : {real_junk} {
6527 tgl 916 GIC 21 : ECHO;
6527 tgl 917 CBC 3 : }
6989 tgl 918 ECB :
6989 tgl 919 GIC 3 :
6989 tgl 920 CBC 1043759 : {identifier} {
723 peter 921 ECB : /*
922 : * We need to track if we are inside a BEGIN .. END block
923 : * in a function definition, so that semicolons contained
924 : * therein don't terminate the whole statement. Short of
925 : * writing a full parser here, the following heuristic
926 : * should work. First, we track whether the beginning of
927 : * the statement matches CREATE [OR REPLACE]
928 : * {FUNCTION|PROCEDURE}
929 : */
930 :
931 : if (cur_state->identifier_count == 0)
723 peter 932 CBC 1043759 : memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
723 peter 933 GBC 146435 :
723 peter 934 EUB : if (pg_strcasecmp(yytext, "create") == 0 ||
723 peter 935 GBC 2061423 : pg_strcasecmp(yytext, "function") == 0 ||
723 peter 936 GIC 2030476 : pg_strcasecmp(yytext, "procedure") == 0 ||
723 peter 937 GBC 2024046 : pg_strcasecmp(yytext, "or") == 0 ||
723 peter 938 CBC 2019705 : pg_strcasecmp(yytext, "replace") == 0)
732 peter 939 GIC 1008471 : {
723 peter 940 ECB : if (cur_state->identifier_count < sizeof(cur_state->identifiers))
723 peter 941 CBC 36281 : cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
732 peter 942 GIC 32715 : }
723 peter 943 ECB :
944 : cur_state->identifier_count++;
723 peter 945 GIC 1043759 :
723 peter 946 ECB : if (cur_state->identifiers[0] == 'c' &&
723 peter 947 CBC 1043759 : (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
723 peter 948 GIC 258385 : (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
723 peter 949 CBC 233929 : (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
950 10368 : cur_state->paren_depth == 0)
732 peter 951 GIC 31020 : {
723 peter 952 ECB : if (pg_strcasecmp(yytext, "begin") == 0)
723 peter 953 CBC 25061 : cur_state->begin_depth++;
723 peter 954 GIC 36 : else if (pg_strcasecmp(yytext, "case") == 0)
723 peter 955 CBC 25025 : {
723 peter 956 ECB : /*
957 : * CASE also ends with END. We only need to track
958 : * this if we are already inside a BEGIN.
959 : */
960 : if (cur_state->begin_depth >= 1)
723 peter 961 CBC 3 : cur_state->begin_depth++;
962 3 : }
963 : else if (pg_strcasecmp(yytext, "end") == 0)
723 peter 964 GIC 25022 : {
965 : if (cur_state->begin_depth > 0)
966 39 : cur_state->begin_depth--;
967 39 : }
968 : }
969 :
970 : ECHO;
6989 tgl 971 1043759 : }
972 :
973 1043759 : {other} {
6989 tgl 974 LBC 0 : ECHO;
975 0 : }
976 :
977 0 : <<EOF>> {
2577 tgl 978 CBC 276973 : if (cur_state->buffer_stack == NULL)
2578 979 276973 : {
2578 tgl 980 ECB : cur_state->start_state = YY_START;
2577 tgl 981 CBC 276407 : return LEXRES_EOL; /* end of input reached */
2578 tgl 982 GIC 276407 : }
6989 tgl 983 ECB :
984 : /*
985 : * We were expanding a variable, so pop the inclusion
986 : * stack and keep lexing
987 : */
988 : psqlscan_pop_buffer_stack(cur_state);
2577 tgl 989 CBC 566 : psqlscan_select_top_buffer(cur_state);
6989 990 566 : }
6989 tgl 991 ECB :
6989 tgl 992 CBC 566 : %%
6989 tgl 993 LBC 0 :
994 : /* LCOV_EXCL_STOP */
2068 peter_e 995 ECB :
6989 tgl 996 : /*
997 : * Create a lexer working state struct.
998 : *
999 : * callbacks is a struct of function pointers that encapsulate some
1000 : * behavior we need from the surrounding program. This struct must
1001 : * remain valid for the lifespan of the PsqlScanState.
1002 : */
1003 : PsqlScanState
2578 1004 : psql_scan_create(const PsqlScanCallbacks *callbacks)
6989 tgl 1005 GIC 6233 : {
6989 tgl 1006 ECB : PsqlScanState state;
1007 :
3841 1008 : state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
6989 tgl 1009 CBC 6233 :
1010 : state->callbacks = callbacks;
2578 tgl 1011 GIC 6233 :
1012 : yylex_init(&state->scanner);
2577 tgl 1013 CBC 6233 :
1014 : yyset_extra(state, state->scanner);
2578 1015 6233 :
6989 tgl 1016 EUB : psql_scan_reset(state);
6989 tgl 1017 GBC 6233 :
1018 : return state;
1019 6233 : }
6989 tgl 1020 ECB :
1021 : /*
1022 : * Destroy a lexer working state struct, releasing all resources.
1023 : */
1024 : void
1025 : psql_scan_destroy(PsqlScanState state)
6989 tgl 1026 GIC 6186 : {
1027 : psql_scan_finish(state);
1028 6186 :
1029 : psql_scan_reset(state);
6984 1030 6186 :
2578 tgl 1031 ECB : yylex_destroy(state->scanner);
2578 tgl 1032 CBC 6186 :
1033 : free(state);
6989 1034 6186 : }
6989 tgl 1035 GBC 6186 :
1036 : /*
1037 : * Set the callback passthrough pointer for the lexer.
1038 : *
1039 : * This could have been integrated into psql_scan_create, but keeping it
1040 : * separate allows the application to change the pointer later, which might
1041 : * be useful.
1042 : */
1043 : void
1044 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
2218 tgl 1045 GIC 5971 : {
1046 : state->cb_passthrough = passthrough;
2218 tgl 1047 CBC 5971 : }
2218 tgl 1048 GIC 5971 :
1049 : /*
1050 : * Set up to perform lexing of the given input line.
6989 tgl 1051 ECB : *
1052 : * The text at *line, extending for line_len bytes, will be scanned by
1053 : * subsequent calls to the psql_scan routines. psql_scan_finish should
1054 : * be called when scanning is complete. Note that the lexer retains
1055 : * a pointer to the storage at *line --- this string must not be altered
1056 : * or freed until after psql_scan_finish is called.
2578 1057 : *
1058 : * encoding is the libpq identifier for the character encoding in use,
1059 : * and std_strings says whether standard_conforming_strings is on.
1060 : */
6989 1061 : void
1062 : psql_scan_setup(PsqlScanState state,
2578 tgl 1063 GIC 276494 : const char *line, int line_len,
1064 : int encoding, bool std_strings)
1065 : {
1066 : /* Mustn't be scanning already */
1067 : Assert(state->scanbufhandle == NULL);
3768 andrew 1068 CBC 276494 : Assert(state->buffer_stack == NULL);
6989 tgl 1069 GIC 276494 :
6989 tgl 1070 ECB : /* Do we need to hack the character set encoding? */
1071 : state->encoding = encoding;
2578 tgl 1072 CBC 276494 : state->safe_encoding = pg_valid_server_encoding_id(encoding);
2578 tgl 1073 GIC 276494 :
2578 tgl 1074 ECB : /* Save standard-strings flag as well */
1075 : state->std_strings = std_strings;
6989 tgl 1076 CBC 276494 :
6989 tgl 1077 ECB : /* Set up flex input buffer with appropriate translation and padding */
1078 : state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
2577 tgl 1079 GIC 276494 : &state->scanbuf);
1080 : state->scanline = line;
6989 1081 276494 :
1082 : /* Set lookaside data in case we have to map unsafe encoding */
1083 : state->curline = state->scanbuf;
1084 276494 : state->refline = state->scanline;
1085 276494 : }
1086 276494 :
6989 tgl 1087 ECB : /*
1088 : * Do lexical analysis of SQL command text.
1089 : *
1090 : * The text previously passed to psql_scan_setup is scanned, and appended
1091 : * (possibly with transformation) to query_buf.
1092 : *
1093 : * The return value indicates the condition that stopped scanning:
1094 : *
1095 : * PSCAN_SEMICOLON: found a command-ending semicolon. (The semicolon is
1096 : * transferred to query_buf.) The command accumulated in query_buf should
1097 : * be executed, then clear query_buf and call again to scan the remainder
1098 : * of the line.
1099 : *
1100 : * PSCAN_BACKSLASH: found a backslash that starts a special command.
1101 : * Any previous data on the line has been transferred to query_buf.
1102 : * The caller will typically next apply a separate flex lexer to scan
1103 : * the special command.
1104 : *
1105 : * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
1106 : * incomplete SQL command. *prompt is set to the appropriate prompt type.
1107 : *
1108 : * PSCAN_EOL: the end of the line was reached, and there is no lexical
1109 : * reason to consider the command incomplete. The caller may or may not
1110 : * choose to send it. *prompt is set to the appropriate prompt type if
1111 : * the caller chooses to collect more input.
1112 : *
1113 : * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
1114 : * be called next, then the cycle may be repeated with a fresh input line.
1115 : *
1116 : * In all cases, *prompt is set to an appropriate prompt type code for the
1117 : * next line-input operation.
1118 : */
1119 : PsqlScanResult
1120 : psql_scan(PsqlScanState state,
6989 tgl 1121 CBC 425182 : PQExpBuffer query_buf,
1122 : promptStatus_t *prompt)
6989 tgl 1123 ECB : {
1124 : PsqlScanResult result;
1125 : int lexresult;
1126 :
1127 : /* Must be scanning already */
3768 andrew 1128 : Assert(state->scanbufhandle != NULL);
6989 tgl 1129 GIC 425182 :
1130 : /* Set current output target */
1131 : state->output_buf = query_buf;
1132 425182 :
1133 : /* Set input source */
1134 : if (state->buffer_stack != NULL)
2578 1135 425182 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
6989 1136 45 : else
1137 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
1138 425137 :
1139 : /* And lex. */
1140 : lexresult = yylex(NULL, state->scanner);
1141 425182 :
1142 : /*
1143 : * Check termination state and return appropriate result info.
1144 : */
1145 : switch (lexresult)
1146 425182 : {
1147 : case LEXRES_EOL: /* end of input */
1148 276407 : switch (state->start_state)
1149 276407 : {
1150 : case INITIAL:
1182 1151 254483 : case xqs: /* we treat this like INITIAL */
1152 : if (state->paren_depth > 0)
6989 1153 254483 : {
1154 : result = PSCAN_INCOMPLETE;
1155 19146 : *prompt = PROMPT_PAREN;
1156 19146 : }
1157 : else if (state->begin_depth > 0)
732 peter 1158 235337 : {
1159 : result = PSCAN_INCOMPLETE;
1160 78 : *prompt = PROMPT_CONTINUE;
1161 78 : }
1162 : else if (query_buf->len > 0)
6989 tgl 1163 CBC 235259 : {
1164 : result = PSCAN_EOL;
6989 tgl 1165 GIC 44435 : *prompt = PROMPT_CONTINUE;
1166 44435 : }
1167 : else
1168 : {
1169 : /* never bother to send an empty buffer */
1170 : result = PSCAN_INCOMPLETE;
6989 tgl 1171 CBC 190824 : *prompt = PROMPT_READY;
6989 tgl 1172 GIC 190824 : }
1173 : break;
6989 tgl 1174 CBC 254483 : case xb:
6989 tgl 1175 UIC 0 : result = PSCAN_INCOMPLETE;
1176 0 : *prompt = PROMPT_SINGLEQUOTE;
6989 tgl 1177 LBC 0 : break;
1178 0 : case xc:
6989 tgl 1179 GIC 367 : result = PSCAN_INCOMPLETE;
6989 tgl 1180 CBC 367 : *prompt = PROMPT_COMMENT;
6989 tgl 1181 GIC 367 : break;
1182 367 : case xd:
6989 tgl 1183 CBC 9 : result = PSCAN_INCOMPLETE;
6989 tgl 1184 GIC 9 : *prompt = PROMPT_DOUBLEQUOTE;
1185 9 : break;
1186 9 : case xh:
6989 tgl 1187 UIC 0 : result = PSCAN_INCOMPLETE;
6989 tgl 1188 LBC 0 : *prompt = PROMPT_SINGLEQUOTE;
6989 tgl 1189 UIC 0 : break;
4548 tgl 1190 LBC 0 : case xe:
6989 tgl 1191 CBC 301 : result = PSCAN_INCOMPLETE;
6989 tgl 1192 GIC 301 : *prompt = PROMPT_SINGLEQUOTE;
6989 tgl 1193 CBC 301 : break;
4548 tgl 1194 GIC 301 : case xq:
6243 bruce 1195 CBC 4661 : result = PSCAN_INCOMPLETE;
6243 bruce 1196 GIC 4661 : *prompt = PROMPT_SINGLEQUOTE;
6243 bruce 1197 CBC 4661 : break;
6984 tgl 1198 4661 : case xdolq:
6984 tgl 1199 GIC 16586 : result = PSCAN_INCOMPLETE;
6984 tgl 1200 CBC 16586 : *prompt = PROMPT_DOLLARQUOTE;
6984 tgl 1201 GIC 16586 : break;
4548 tgl 1202 CBC 16586 : case xui:
4548 tgl 1203 LBC 0 : result = PSCAN_INCOMPLETE;
4548 tgl 1204 UIC 0 : *prompt = PROMPT_DOUBLEQUOTE;
4548 tgl 1205 LBC 0 : break;
4548 tgl 1206 UIC 0 : case xus:
4548 tgl 1207 LBC 0 : result = PSCAN_INCOMPLETE;
1208 0 : *prompt = PROMPT_SINGLEQUOTE;
4548 tgl 1209 UIC 0 : break;
6989 1210 0 : default:
1211 0 : /* can't get here */
1212 : fprintf(stderr, "invalid YY_START\n");
6989 tgl 1213 LBC 0 : exit(1);
1214 0 : }
1215 : break;
6989 tgl 1216 CBC 276407 : case LEXRES_SEMI: /* semicolon */
6989 tgl 1217 GBC 142127 : result = PSCAN_SEMICOLON;
1218 142127 : *prompt = PROMPT_READY;
1219 142127 : break;
1220 142127 : case LEXRES_BACKSLASH: /* backslash */
6989 tgl 1221 CBC 6648 : result = PSCAN_BACKSLASH;
1222 6648 : *prompt = PROMPT_READY;
1223 6648 : break;
1224 6648 : default:
6989 tgl 1225 LBC 0 : /* can't get here */
6989 tgl 1226 ECB : fprintf(stderr, "invalid yylex result\n");
6989 tgl 1227 LBC 0 : exit(1);
1228 0 : }
6989 tgl 1229 EUB :
1230 : return result;
6989 tgl 1231 GBC 425182 : }
6989 tgl 1232 EUB :
6989 tgl 1233 ECB : /*
1234 : * Clean up after scanning a string. This flushes any unread input and
1235 : * releases resources (but not the PsqlScanState itself). Note however
1236 : * that this does not reset the lexer scan state; that can be done by
1237 : * psql_scan_reset(), which is an orthogonal operation.
1238 : *
1239 : * It is legal to call this when not scanning anything (makes it easier
1240 : * to deal with error recovery).
1241 : */
1242 : void
1243 : psql_scan_finish(PsqlScanState state)
6989 tgl 1244 CBC 282631 : {
6989 tgl 1245 EUB : /* Drop any incomplete variable expansions. */
1246 : while (state->buffer_stack != NULL)
2577 tgl 1247 GBC 282631 : psqlscan_pop_buffer_stack(state);
6989 tgl 1248 UBC 0 :
6989 tgl 1249 EUB : /* Done with the outer scan buffer, too */
1250 : if (state->scanbufhandle)
2578 tgl 1251 GBC 282631 : yy_delete_buffer(state->scanbufhandle, state->scanner);
6989 1252 276447 : state->scanbufhandle = NULL;
1253 282631 : if (state->scanbuf)
6989 tgl 1254 GIC 282631 : free(state->scanbuf);
6989 tgl 1255 GBC 276447 : state->scanbuf = NULL;
1256 282631 : }
6989 tgl 1257 GIC 282631 :
6989 tgl 1258 ECB : /*
1259 : * Reset lexer scanning state to start conditions. This is appropriate
1260 : * for executing \r psql commands (or any other time that we discard the
1261 : * prior contents of query_buf). It is not, however, necessary to do this
1262 : * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
1263 : * PSCAN_EOL scan result, because the scan state must be INITIAL when those
1264 : * conditions are returned.
1265 : *
1266 : * Note that this is unrelated to flushing unread input; that task is
6989 tgl 1267 EUB : * done by psql_scan_finish().
1268 : */
1269 : void
1270 : psql_scan_reset(PsqlScanState state)
6989 tgl 1271 GIC 12953 : {
1272 : state->start_state = INITIAL;
6989 tgl 1273 CBC 12953 : state->paren_depth = 0;
6989 tgl 1274 GIC 12953 : state->xcdepth = 0; /* not really necessary */
6984 1275 12953 : if (state->dolqstart)
1276 12953 : free(state->dolqstart);
6984 tgl 1277 UIC 0 : state->dolqstart = NULL;
732 peter 1278 GIC 12953 : state->identifier_count = 0;
1279 12953 : state->begin_depth = 0;
6989 tgl 1280 12953 : }
1281 12953 :
1282 : /*
1283 : * Reselect this lexer (psqlscan.l) after using another one.
1284 : *
1285 : * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
2577 tgl 1286 ECB : * state, because we'd never switch to another lexer in a different state.
1287 : * However, we don't want to reset e.g. paren_depth, so this can't be
1288 : * the same as psql_scan_reset().
6989 1289 : *
2577 tgl 1290 EUB : * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
1291 : * must be a superset of this.
1292 : *
2577 tgl 1293 ECB : * Note: it seems likely that other lexers could just assign INITIAL for
1294 : * themselves, since that probably has the value zero in every flex-generated
1295 : * lexer. But let's not assume that.
6989 1296 : */
1297 : void
2577 1298 : psql_scan_reselect_sql_lexer(PsqlScanState state)
6989 tgl 1299 CBC 29956 : {
1300 : state->start_state = INITIAL;
6989 tgl 1301 GIC 29956 : }
1302 29956 :
1303 : /*
1304 : * Return true if lexer is currently in an "inside quotes" state.
1305 : *
1306 : * This is pretty grotty but is needed to preserve the old behavior
1307 : * that mainloop.c drops blank lines not inside quotes without even
1308 : * echoing them.
1309 : */
1310 : bool
1311 : psql_scan_in_quote(PsqlScanState state)
4244 1312 60029 : {
1182 tgl 1313 ECB : return state->start_state != INITIAL &&
1182 tgl 1314 GIC 60483 : state->start_state != xqs;
4244 tgl 1315 CBC 454 : }
6989 tgl 1316 ECB :
1317 : /*
1318 : * Push the given string onto the stack of stuff to scan.
6989 tgl 1319 EUB : *
6989 tgl 1320 ECB : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1321 : */
2577 1322 : void
1323 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
2577 tgl 1324 GIC 566 : const char *varname)
1325 : {
1326 : StackElem *stackelem;
1327 :
1328 : stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
4722 1329 566 :
1330 : /*
1331 : * In current usage, the passed varname points at the current flex input
1332 : * buffer; we must copy it before calling psqlscan_prepare_buffer()
1333 : * because that will change the buffer state.
1334 : */
1335 : stackelem->varname = varname ? pg_strdup(varname) : NULL;
1336 566 :
1337 : stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
2577 1338 566 : &stackelem->bufstring);
1339 : state->curline = stackelem->bufstring;
2578 1340 566 : if (state->safe_encoding)
6989 tgl 1341 CBC 566 : {
1342 : stackelem->origstring = NULL;
2578 1343 566 : state->refline = stackelem->bufstring;
6989 1344 566 : }
1345 : else
1346 : {
1347 : stackelem->origstring = pg_strdup(newstr);
2578 tgl 1348 UIC 0 : state->refline = stackelem->origstring;
6989 1349 0 : }
1350 : stackelem->next = state->buffer_stack;
2578 tgl 1351 GIC 566 : state->buffer_stack = stackelem;
6989 1352 566 : }
1353 566 :
4722 tgl 1354 ECB : /*
1355 : * Pop the topmost buffer stack item (there must be one!)
1356 : *
1357 : * NB: after this, the flex input state is unspecified; caller must
1358 : * switch to an appropriate buffer to continue lexing.
1359 : * See psqlscan_select_top_buffer().
1360 : */
1361 : void
1362 : psqlscan_pop_buffer_stack(PsqlScanState state)
4722 tgl 1363 GIC 566 : {
1364 : StackElem *stackelem = state->buffer_stack;
1365 566 :
4722 tgl 1366 ECB : state->buffer_stack = stackelem->next;
2578 tgl 1367 GIC 566 : yy_delete_buffer(stackelem->buf, state->scanner);
4722 1368 566 : free(stackelem->bufstring);
1369 566 : if (stackelem->origstring)
1370 566 : free(stackelem->origstring);
4722 tgl 1371 LBC 0 : if (stackelem->varname)
4722 tgl 1372 GIC 566 : free(stackelem->varname);
1373 566 : free(stackelem);
1374 566 : }
1375 566 :
1376 : /*
1377 : * Select the topmost surviving buffer as the active input.
2577 tgl 1378 ECB : */
1379 : void
1380 : psqlscan_select_top_buffer(PsqlScanState state)
2577 tgl 1381 GIC 566 : {
2577 tgl 1382 ECB : StackElem *stackelem = state->buffer_stack;
2577 tgl 1383 CBC 566 :
1384 : if (stackelem != NULL)
1385 566 : {
2577 tgl 1386 ECB : yy_switch_to_buffer(stackelem->buf, state->scanner);
2577 tgl 1387 UIC 0 : state->curline = stackelem->bufstring;
1388 0 : state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
1389 0 : }
2577 tgl 1390 EUB : else
1391 : {
1392 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
2577 tgl 1393 CBC 566 : state->curline = state->scanbuf;
1394 566 : state->refline = state->scanline;
1395 566 : }
1396 : }
2577 tgl 1397 GIC 566 :
1398 : /*
1399 : * Check if specified variable name is the source for any string
1400 : * currently being scanned
1401 : */
1402 : bool
1403 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
4722 1404 566 : {
4722 tgl 1405 ECB : StackElem *stackelem;
1406 :
1407 : for (stackelem = state->buffer_stack;
4722 tgl 1408 GIC 566 : stackelem != NULL;
4722 tgl 1409 CBC 566 : stackelem = stackelem->next)
4722 tgl 1410 LBC 0 : {
4722 tgl 1411 ECB : if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
4722 tgl 1412 LBC 0 : return true;
4722 tgl 1413 UBC 0 : }
4722 tgl 1414 ECB : return false;
4722 tgl 1415 CBC 566 : }
4722 tgl 1416 ECB :
6989 1417 : /*
1418 : * Set up a flex input buffer to scan the given data. We always make a
1419 : * copy of the data. If working in an unsafe encoding, the copy has
1420 : * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
1421 : *
1422 : * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
1423 : */
1424 : YY_BUFFER_STATE
2577 1425 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
2577 tgl 1426 GIC 277060 : char **txtcopy)
6989 tgl 1427 ECB : {
1428 : char *newtxt;
6989 tgl 1429 EUB :
1430 : /* Flex wants two \0 characters after the actual data */
1431 : newtxt = pg_malloc(len + 2);
6989 tgl 1432 GIC 277060 : *txtcopy = newtxt;
1433 277060 : newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
1434 277060 :
2578 tgl 1435 ECB : if (state->safe_encoding)
6989 tgl 1436 CBC 277060 : memcpy(newtxt, txt, len);
1437 277060 : else
1438 : {
6989 tgl 1439 ECB : /* Gotta do it the hard way */
1440 : int i = 0;
6989 tgl 1441 UIC 0 :
1442 : while (i < len)
1443 0 : {
1444 : int thislen = PQmblen(txt + i, state->encoding);
1445 0 :
6989 tgl 1446 ECB : /* first byte should always be okay... */
1447 : newtxt[i] = txt[i];
6989 tgl 1448 UIC 0 : i++;
3780 ishii 1449 0 : while (--thislen > 0 && i < len)
6989 tgl 1450 LBC 0 : newtxt[i++] = (char) 0xFF;
1451 0 : }
6989 tgl 1452 EUB : }
1453 :
2578 1454 : return yy_scan_buffer(newtxt, len + 2, state->scanner);
6989 tgl 1455 GBC 277060 : }
1456 :
6989 tgl 1457 ECB : /*
1458 : * psqlscan_emit() --- body for ECHO macro
1459 : *
1460 : * NB: this must be used for ALL and ONLY the text copied from the flex
1461 : * input data. If you pass it something that is not part of the yytext
1462 : * string, you are making a mistake. Internally generated text can be
1463 : * appended directly to state->output_buf.
1464 : */
1465 : void
1466 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
6989 tgl 1467 GIC 3281909 : {
2578 tgl 1468 ECB : PQExpBuffer output_buf = state->output_buf;
2578 tgl 1469 GIC 3281909 :
1470 : if (state->safe_encoding)
6989 1471 3281909 : appendBinaryPQExpBuffer(output_buf, txt, len);
1472 3281909 : else
1473 : {
6989 tgl 1474 ECB : /* Gotta do it the hard way */
2578 1475 : const char *reference = state->refline;
2577 tgl 1476 LBC 0 : int i;
1477 :
2578 tgl 1478 ECB : reference += (txt - state->curline);
6989 tgl 1479 LBC 0 :
1480 : for (i = 0; i < len; i++)
6989 tgl 1481 UIC 0 : {
1482 : char ch = txt[i];
6989 tgl 1483 UBC 0 :
1484 : if (ch == (char) 0xFF)
1485 0 : ch = reference[i];
6989 tgl 1486 UIC 0 : appendPQExpBufferChar(output_buf, ch);
6989 tgl 1487 UBC 0 : }
1488 : }
1489 : }
4942 tgl 1490 GBC 3281909 :
4244 tgl 1491 EUB : /*
2577 1492 : * psqlscan_extract_substring --- fetch value of (part of) the current token
4244 1493 : *
1494 : * This is like psqlscan_emit(), except that the data is returned as a
1495 : * malloc'd string rather than being pushed directly to state->output_buf.
1496 : */
2577 tgl 1497 ECB : char *
1498 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
4244 tgl 1499 GIC 2293 : {
1500 : char *result = (char *) pg_malloc(len + 1);
1501 2293 :
1502 : if (state->safe_encoding)
1503 2293 : memcpy(result, txt, len);
1504 2293 : else
1505 : {
1506 : /* Gotta do it the hard way */
1507 : const char *reference = state->refline;
2577 tgl 1508 UIC 0 : int i;
4244 tgl 1509 ECB :
1510 : reference += (txt - state->curline);
4244 tgl 1511 LBC 0 :
1512 : for (i = 0; i < len; i++)
1513 0 : {
2577 tgl 1514 ECB : char ch = txt[i];
4244 tgl 1515 UIC 0 :
1516 : if (ch == (char) 0xFF)
1517 0 : ch = reference[i];
4244 tgl 1518 UBC 0 : result[i] = ch;
4244 tgl 1519 UIC 0 : }
1520 : }
4244 tgl 1521 EUB : result[len] = '\0';
4244 tgl 1522 GIC 2293 : return result;
4244 tgl 1523 GBC 2293 : }
1524 :
4244 tgl 1525 EUB : /*
1526 : * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
1527 : *
1528 : * If the variable name is found, escape its value using the appropriate
1529 : * quoting method and emit the value to output_buf. (Since the result is
1530 : * surely quoted, there is never any reason to rescan it.) If we don't
1531 : * find the variable or escaping fails, emit the token as-is.
4244 tgl 1532 ECB : */
1533 : void
1534 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
2199 tgl 1535 GIC 388 : PsqlScanQuoteType quote)
1536 : {
1537 : char *varname;
1538 : char *value;
1539 :
1540 : /* Variable lookup. */
2577 tgl 1541 ECB : varname = psqlscan_extract_substring(state, txt + 2, len - 3);
2578 tgl 1542 GIC 388 : if (state->callbacks->get_variable)
2199 tgl 1543 CBC 388 : value = state->callbacks->get_variable(varname, quote,
2218 tgl 1544 GIC 388 : state->cb_passthrough);
2578 tgl 1545 ECB : else
1546 : value = NULL;
4244 tgl 1547 UIC 0 : free(varname);
4818 rhaas 1548 GIC 388 :
1549 : if (value)
4818 rhaas 1550 GBC 388 : {
1551 : /* Emit the suitably-escaped value */
1552 : appendPQExpBufferStr(state->output_buf, value);
2578 tgl 1553 366 : free(value);
2578 tgl 1554 GIC 366 : }
2578 tgl 1555 EUB : else
1556 : {
1557 : /* Emit original token as-is */
1558 : psqlscan_emit(state, txt, len);
4818 rhaas 1559 GBC 22 : }
4818 rhaas 1560 EUB : }
2026 andrew 1561 GBC 388 :
1562 : void
1563 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
2026 andrew 1564 CBC 12 : {
2026 andrew 1565 ECB : char *varname;
1566 : char *value;
1567 :
1568 : varname = psqlscan_extract_substring(state, txt + 3, len - 4);
2026 andrew 1569 GIC 12 : if (state->callbacks->get_variable)
1570 12 : value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
1571 12 : state->cb_passthrough);
1572 : else
1573 : value = NULL;
2026 andrew 1574 UIC 0 : free(varname);
2026 andrew 1575 GIC 12 :
1576 : if (value != NULL)
2026 andrew 1577 CBC 12 : {
1578 : psqlscan_emit(state, "TRUE", 4);
2026 andrew 1579 GIC 6 : free(value);
1580 6 : }
1581 : else
1582 : {
1583 : psqlscan_emit(state, "FALSE", 5);
2026 andrew 1584 CBC 6 : }
2026 andrew 1585 ECB : }
|