Age Owner TLA Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /*
48 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
49 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
50 : * this would cause warnings. Providing our own declarations should be
51 : * harmless even when the bug gets fixed.
52 : */
53 : extern int expr_yyget_column(yyscan_t yyscanner);
54 : extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
55 :
56 : /* LCOV_EXCL_START */
57 :
58 : %}
59 :
60 : /* Except for the prefix, these options should match psqlscan.l */
61 : %option reentrant
62 : %option bison-bridge
63 : %option 8bit
64 : %option never-interactive
65 : %option nodefault
66 : %option noinput
67 : %option nounput
68 : %option noyywrap
69 : %option warn
70 : %option prefix="expr_yy"
71 :
72 : /* Character classes */
73 : alpha [a-zA-Z\200-\377_]
74 : digit [0-9]
75 : alnum [A-Za-z\200-\377_0-9]
76 : /* {space} + {nonspace} + {newline} should cover all characters */
77 : space [ \t\r\f\v]
78 : nonspace [^ \t\r\f\v\n]
79 : newline [\n]
80 :
81 : /* Line continuation marker */
82 : continuation \\\r?{newline}
83 :
84 : /* case insensitive keywords */
85 : and [Aa][Nn][Dd]
86 : or [Oo][Rr]
87 : not [Nn][Oo][Tt]
88 : case [Cc][Aa][Ss][Ee]
89 : when [Ww][Hh][Ee][Nn]
90 : then [Tt][Hh][Ee][Nn]
91 : else [Ee][Ll][Ss][Ee]
92 : end [Ee][Nn][Dd]
93 : true [Tt][Rr][Uu][Ee]
94 : false [Ff][Aa][Ll][Ss][Ee]
95 : null [Nn][Uu][Ll][Ll]
96 : is [Ii][Ss]
97 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
98 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
99 :
100 : /* Exclusive states */
101 : %x EXPR
102 :
103 : %%
104 :
105 : %{
106 : /* Declare some local variables inside yylex(), for convenience */
107 : PsqlScanState cur_state = yyextra;
2576 tgl 108 GIC 4957 :
109 : /*
110 : * Force flex into the state indicated by start_state. This has a
111 : * couple of purposes: it lets some of the functions below set a new
112 : * starting state without ugly direct access to flex variables, and it
113 : * allows us to transition from one flex lexer to another so that we
114 : * can lex different parts of the source string using separate lexers.
115 : */
116 : BEGIN(cur_state->start_state);
117 4957 :
2576 tgl 118 ECB : /* Reset was-newline flag */
119 : last_was_newline = false;
2577 tgl 120 GIC 4957 : %}
121 :
122 : /* INITIAL state */
123 :
124 : {nonspace}+ {
125 : /* Found a word, emit and return it */
126 : psqlscan_emit(cur_state, yytext, yyleng);
2576 tgl 127 CBC 1119 : return 1;
2576 tgl 128 GIC 1119 : }
129 :
2270 tgl 130 ECB : /*
131 : * We need this rule to avoid returning "word\" instead of recognizing
132 : * a continuation marker just after a word:
133 : */
134 : {nonspace}+{continuation} {
1665 tgl 135 GIC 1 : /* Found "word\\\r?\n", emit and return just "word" */
136 : int wordlen = yyleng - 2;
1665 tgl 137 CBC 1 : if (yytext[wordlen] == '\r')
138 1 : wordlen--;
1665 tgl 139 UIC 0 : Assert(yytext[wordlen] == '\\');
1665 tgl 140 GIC 1 : psqlscan_emit(cur_state, yytext, wordlen);
2270 141 1 : return 1;
142 1 : }
143 :
144 : {space}+ { /* ignore */ }
2576 tgl 145 CBC 654 :
2270 tgl 146 GIC 654 : {continuation} { /* ignore */ }
2270 tgl 147 CBC 1 :
2576 148 1 : {newline} {
2576 tgl 149 GBC 58 : /* report end of command */
2576 tgl 150 ECB : last_was_newline = true;
2576 tgl 151 CBC 58 : return 0;
152 58 : }
153 :
154 : /* EXPR state */
2576 tgl 155 ECB :
156 : <EXPR>{
2576 tgl 157 CBC 53 :
2576 tgl 158 ECB : "+" { return '+'; }
2576 tgl 159 CBC 53 : "-" { return '-'; }
2576 tgl 160 GIC 65 : "*" { return '*'; }
2576 tgl 161 CBC 65 : "/" { return '/'; }
1916 teodor 162 204 : "%" { return '%'; } /* C version, also in Pg SQL */
1916 teodor 163 GIC 12 : "=" { return '='; }
164 2 : "<>" { return NE_OP; }
165 31 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
166 5 : "<=" { return LE_OP; }
1916 teodor 167 CBC 3 : ">=" { return GE_OP; }
1916 teodor 168 GIC 4 : "<<" { return LS_OP; }
1916 teodor 169 CBC 3 : ">>" { return RS_OP; }
170 7 : "<" { return '<'; }
171 1 : ">" { return '>'; }
172 10 : "|" { return '|'; }
173 6 : "&" { return '&'; }
174 2 : "#" { return '#'; }
175 1 : "~" { return '~'; }
176 1 :
2576 tgl 177 2 : "(" { return '('; }
178 451 : ")" { return ')'; }
179 453 : "," { return ','; }
2933 180 453 :
1916 teodor 181 343 : {and} { return AND_OP; }
182 44 : {or} { return OR_OP; }
183 5 : {not} { return NOT_OP; }
184 5 : {is} { return IS_OP; }
185 15 : {isnull} { return ISNULL_OP; }
186 8 : {notnull} { return NOTNULL_OP; }
187 1 :
188 1 : {case} { return CASE_KW; }
189 15 : {when} { return WHEN_KW; }
190 16 : {then} { return THEN_KW; }
191 16 : {else} { return ELSE_KW; }
192 17 : {end} { return END_KW; }
193 11 :
2590 tgl 194 15 : :{alnum}+ {
2576 195 269 : yylval->str = pg_strdup(yytext + 1);
2933 196 269 : return VARIABLE;
197 269 : }
1916 teodor 198 ECB :
199 : {null} { return NULL_CONST; }
1916 teodor 200 CBC 12 : {true} {
1916 teodor 201 ECB : yylval->bval = true;
1916 teodor 202 CBC 14 : return BOOLEAN_CONST;
203 14 : }
1916 teodor 204 ECB : {false} {
205 : yylval->bval = false;
1916 teodor 206 CBC 9 : return BOOLEAN_CONST;
207 9 : }
208 : "9223372036854775808" {
209 : /*
1655 andres 210 1 : * Special handling for PG_INT64_MIN, which can't
211 : * accurately be represented here, as the minus sign is
1655 andres 212 ECB : * lexed separately and INT64_MIN can't be represented as
213 : * a positive integer.
214 : */
215 : return MAXINT_PLUS_ONE_CONST;
1655 andres 216 CBC 1 : }
2590 tgl 217 ECB : {digit}+ {
218 : if (!strtoint64(yytext, true, &yylval->ival))
1655 andres 219 GIC 794 : expr_yyerror_more(yyscanner, "bigint constant overflow",
1655 andres 220 CBC 1 : strdup(yytext));
2567 tgl 221 GIC 1 : return INTEGER_CONST;
2933 222 793 : }
223 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
224 : if (!strtodouble(yytext, true, &yylval->dval))
1655 andres 225 61 : expr_yyerror_more(yyscanner, "double constant overflow",
1655 andres 226 CBC 1 : strdup(yytext));
2567 tgl 227 GIC 1 : return DOUBLE_CONST;
228 60 : }
2567 tgl 229 ECB : \.{digit}+([eE][-+]?{digit}+)? {
1655 andres 230 : if (!strtodouble(yytext, true, &yylval->dval))
1655 andres 231 CBC 2 : expr_yyerror_more(yyscanner, "double constant overflow",
232 1 : strdup(yytext));
2567 tgl 233 GIC 1 : return DOUBLE_CONST;
2568 rhaas 234 1 : }
2590 tgl 235 ECB : {alpha}{alnum}* {
2576 236 : yylval->str = pg_strdup(yytext);
2595 rhaas 237 CBC 399 : return FUNCTION;
238 399 : }
239 :
240 : {space}+ { /* ignore */ }
2270 tgl 241 1639 :
242 1639 : {continuation} { /* ignore */ }
243 17 :
2576 244 17 : {newline} {
2576 tgl 245 GIC 353 : /* report end of command */
246 : last_was_newline = true;
2576 tgl 247 CBC 353 : return 0;
248 353 : }
249 :
250 : . {
251 1 : /*
2576 tgl 252 ECB : * must strdup yytext so that expr_yyerror_more doesn't
253 : * change it while finding end of line
254 : */
255 : expr_yyerror_more(yyscanner, "unexpected character",
2576 tgl 256 GIC 1 : pg_strdup(yytext));
2576 tgl 257 CBC 1 : /* NOTREACHED, syntax_error calls exit() */
2577 tgl 258 ECB : return 0;
259 : }
260 :
2576 261 : }
262 :
2576 tgl 263 GIC 57 : <<EOF>> {
264 : if (cur_state->buffer_stack == NULL)
265 57 : return 0; /* end of input reached */
2576 tgl 266 CBC 57 :
2576 tgl 267 ECB : /*
268 : * We were expanding a variable, so pop the inclusion
269 : * stack and keep lexing
270 : */
271 : psqlscan_pop_buffer_stack(cur_state);
2576 tgl 272 UIC 0 : psqlscan_select_top_buffer(cur_state);
2576 tgl 273 LBC 0 : }
274 :
2960 rhaas 275 0 : %%
276 0 :
277 : /* LCOV_EXCL_STOP */
278 :
279 : void
280 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
2960 rhaas 281 GIC 19 : {
2576 tgl 282 EUB : PsqlScanState state = yyget_extra(yyscanner);
2576 tgl 283 GBC 19 : int error_detection_offset = expr_scanner_offset(state) - 1;
2576 tgl 284 GIC 19 : YYSTYPE lval;
2576 tgl 285 EUB : char *full_line;
286 :
287 : /*
288 : * While parsing an expression, we may not have collected the whole line
289 : * yet from the input source. Lex till EOL so we can report whole line.
290 : * (If we're at EOF, it's okay to call yylex() an extra time.)
2576 tgl 291 ECB : */
292 : if (!last_was_newline)
2576 tgl 293 CBC 19 : {
2576 tgl 294 ECB : while (yylex(&lval, yyscanner))
2576 tgl 295 GIC 25 : /* skip */ ;
296 : }
297 :
298 : /* Extract the line, trimming trailing newline if any */
299 : full_line = expr_scanner_get_substring(state,
300 19 : expr_start_offset,
301 : expr_scanner_offset(state),
302 : true);
2576 tgl 303 ECB :
304 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
2576 tgl 305 CBC 19 : message, more, error_detection_offset - expr_start_offset);
306 : }
307 :
308 : void
309 : expr_yyerror(yyscan_t yyscanner, const char *message)
2595 rhaas 310 6 : {
311 : expr_yyerror_more(yyscanner, message, NULL);
2960 rhaas 312 GIC 6 : }
313 :
314 : /*
2576 tgl 315 ECB : * Collect a space-separated word from a backslash command and return it
316 : * in word_buf, along with its starting string offset in *offset.
317 : * Returns true if successful, false if at end of command.
318 : */
319 : bool
320 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
2960 rhaas 321 GIC 1202 : {
2576 tgl 322 ECB : int lexresult;
323 : YYSTYPE lval;
324 :
325 : /* Must be scanning already */
326 : Assert(state->scanbufhandle != NULL);
2577 tgl 327 GIC 1202 :
328 : /* Set current output target */
329 : state->output_buf = word_buf;
2576 330 1202 : resetPQExpBuffer(word_buf);
2576 tgl 331 CBC 1202 :
332 : /* Set input source */
333 : if (state->buffer_stack != NULL)
2576 tgl 334 GIC 1202 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
2576 tgl 335 UIC 0 : else
336 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
2929 rhaas 337 CBC 1202 :
338 : /* Set start state */
339 : state->start_state = INITIAL;
2576 tgl 340 1202 :
2576 tgl 341 ECB : /* And lex. */
342 : lexresult = yylex(&lval, state->scanner);
2595 rhaas 343 GIC 1202 :
2960 rhaas 344 ECB : /*
2576 tgl 345 EUB : * Save start offset of word, if any. We could do this more efficiently,
346 : * but for now this seems fine.
2960 rhaas 347 ECB : */
348 : if (lexresult)
2576 tgl 349 GIC 1202 : *offset = expr_scanner_offset(state) - word_buf->len;
2576 tgl 350 CBC 1120 : else
351 : *offset = -1;
2960 rhaas 352 GIC 82 :
2576 tgl 353 ECB : /*
354 : * In case the caller returns to using the regular SQL lexer, reselect the
355 : * appropriate initial state.
356 : */
357 : psql_scan_reselect_sql_lexer(state);
2576 tgl 358 GIC 1202 :
2576 tgl 359 ECB : return (bool) lexresult;
2960 rhaas 360 CBC 1202 : }
361 :
2576 tgl 362 ECB : /*
363 : * Prepare to lex an expression via expr_yyparse().
364 : *
365 : * Returns the yyscan_t that is to be passed to expr_yyparse().
366 : * (This is just state->scanner, but callers don't need to know that.)
367 : */
368 : yyscan_t
369 : expr_scanner_init(PsqlScanState state,
2576 tgl 370 CBC 384 : const char *source, int lineno, int start_offset,
371 : const char *command)
372 : {
373 : /* Save error context info */
374 : expr_source = source;
2576 tgl 375 GIC 384 : expr_lineno = lineno;
376 384 : expr_start_offset = start_offset;
377 384 : expr_command = command;
378 384 :
379 : /* Must be scanning already */
2576 tgl 380 ECB : Assert(state->scanbufhandle != NULL);
2576 tgl 381 GIC 384 :
382 : /* Set current output target */
383 : state->output_buf = NULL;
384 384 :
2576 tgl 385 ECB : /* Set input source */
386 : if (state->buffer_stack != NULL)
2576 tgl 387 CBC 384 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
2576 tgl 388 LBC 0 : else
389 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
2576 tgl 390 GIC 384 :
2576 tgl 391 ECB : /* Set start state */
392 : state->start_state = EXPR;
2576 tgl 393 GIC 384 :
2576 tgl 394 ECB : return state->scanner;
2576 tgl 395 GIC 384 : }
396 :
2960 rhaas 397 ECB : /*
2576 tgl 398 EUB : * Finish lexing an expression.
399 : */
2960 rhaas 400 ECB : void
401 : expr_scanner_finish(yyscan_t yyscanner)
2960 rhaas 402 GIC 365 : {
2576 tgl 403 ECB : PsqlScanState state = yyget_extra(yyscanner);
2576 tgl 404 GIC 365 :
2576 tgl 405 ECB : /*
406 : * Reselect appropriate initial state for SQL lexer.
407 : */
408 : psql_scan_reselect_sql_lexer(state);
2576 tgl 409 GIC 365 : }
410 365 :
411 : /*
2576 tgl 412 ECB : * Get offset from start of string to end of current lexer token.
413 : *
414 : * We rely on the knowledge that flex modifies the scan buffer by storing
415 : * a NUL at the end of the current token (yytext). Note that this might
416 : * not work quite right if we were parsing a sub-buffer, but since pgbench
417 : * never invokes that functionality, it doesn't matter.
418 : */
419 : int
420 : expr_scanner_offset(PsqlScanState state)
2576 tgl 421 GIC 2333 : {
422 : return strlen(state->scanbuf);
423 2333 : }
424 :
425 : /*
426 : * Get a malloc'd copy of the lexer input string from start_offset
427 : * to just before end_offset. If chomp is true, drop any trailing
428 : * newline(s).
429 : */
430 : char *
2576 tgl 431 ECB : expr_scanner_get_substring(PsqlScanState state,
2043 tgl 432 GIC 465 : int start_offset, int end_offset,
2043 tgl 433 ECB : bool chomp)
434 : {
435 : char *result;
436 : const char *scanptr = state->scanbuf + start_offset;
2576 tgl 437 GIC 465 : int slen = end_offset - start_offset;
438 465 :
439 : Assert(slen >= 0);
440 465 : Assert(end_offset <= strlen(state->scanbuf));
2043 441 465 :
2043 tgl 442 ECB : if (chomp)
2043 tgl 443 GIC 465 : {
444 : while (slen > 0 &&
445 876 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
446 876 : slen--;
2043 tgl 447 CBC 411 : }
2043 tgl 448 ECB :
449 : result = (char *) pg_malloc(slen + 1);
2043 tgl 450 CBC 465 : memcpy(result, scanptr, slen);
2576 451 465 : result[slen] = '\0';
2576 tgl 452 GIC 465 :
2576 tgl 453 ECB : return result;
2576 tgl 454 GIC 465 : }
2576 tgl 455 ECB :
456 : /*
457 : * Get the line number associated with the given string offset
458 : * (which must not be past the end of where we've lexed to).
459 : */
460 : int
461 : expr_scanner_get_lineno(PsqlScanState state, int offset)
2576 tgl 462 CBC 1502 : {
463 : int lineno = 1;
464 1502 : const char *p = state->scanbuf;
2576 tgl 465 GIC 1502 :
466 : while (*p && offset > 0)
467 773365 : {
468 : if (*p == '\n')
469 771863 : lineno++;
470 20049 : p++, offset--;
471 771863 : }
2576 tgl 472 ECB : return lineno;
2960 rhaas 473 GIC 1502 : }
|