TLA Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * exprscan.l
5 : * lexical scanner for pgbench backslash commands
6 : *
7 : * This lexer supports two operating modes:
8 : *
9 : * In INITIAL state, just parse off whitespace-separated words (this mode
10 : * is basically equivalent to strtok(), which is what we used to use).
11 : *
12 : * In EXPR state, lex for the simple expression syntax of exprparse.y.
13 : *
14 : * In either mode, stop upon hitting newline or end of string.
15 : *
16 : * Note that this lexer operates within the framework created by psqlscan.l,
17 : *
18 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
19 : * Portions Copyright (c) 1994, Regents of the University of California
20 : *
21 : * src/bin/pgbench/exprscan.l
22 : *
23 : *-------------------------------------------------------------------------
24 : */
25 : #include "postgres_fe.h"
26 :
27 : /*
28 : * NB: include exprparse.h only AFTER including pgbench.h, because pgbench.h
29 : * contains definitions needed for YYSTYPE. Likewise, pgbench.h must come after
30 : * psqlscan_int.h for yyscan_t.
31 : */
32 : #include "fe_utils/psqlscan_int.h"
33 : #include "pgbench.h"
34 : #include "exprparse.h"
35 : }
36 :
37 : %{
38 : /* context information for reporting errors in expressions */
39 : static const char *expr_source = NULL;
40 : static int expr_lineno = 0;
41 : static int expr_start_offset = 0;
42 : static const char *expr_command = NULL;
43 :
44 : /* indicates whether last yylex() call read a newline */
45 : static bool last_was_newline = false;
46 :
47 : /*
48 : * Work around a bug in flex 2.5.35: it emits a couple of functions that
49 : * it forgets to emit declarations for. Since we use -Wmissing-prototypes,
50 : * this would cause warnings. Providing our own declarations should be
51 : * harmless even when the bug gets fixed.
52 : */
53 : extern int expr_yyget_column(yyscan_t yyscanner);
54 : extern void expr_yyset_column(int column_no, yyscan_t yyscanner);
55 :
56 : /* LCOV_EXCL_START */
57 :
58 : %}
59 :
60 : /* Except for the prefix, these options should match psqlscan.l */
61 : %option reentrant
62 : %option bison-bridge
63 : %option 8bit
64 : %option never-interactive
65 : %option nodefault
66 : %option noinput
67 : %option nounput
68 : %option noyywrap
69 : %option warn
70 : %option prefix="expr_yy"
71 :
72 : /* Character classes */
73 : alpha [a-zA-Z\200-\377_]
74 : digit [0-9]
75 : alnum [A-Za-z\200-\377_0-9]
76 : /* {space} + {nonspace} + {newline} should cover all characters */
77 : space [ \t\r\f\v]
78 : nonspace [^ \t\r\f\v\n]
79 : newline [\n]
80 :
81 : /* Line continuation marker */
82 : continuation \\\r?{newline}
83 :
84 : /* case insensitive keywords */
85 : and [Aa][Nn][Dd]
86 : or [Oo][Rr]
87 : not [Nn][Oo][Tt]
88 : case [Cc][Aa][Ss][Ee]
89 : when [Ww][Hh][Ee][Nn]
90 : then [Tt][Hh][Ee][Nn]
91 : else [Ee][Ll][Ss][Ee]
92 : end [Ee][Nn][Dd]
93 : true [Tt][Rr][Uu][Ee]
94 : false [Ff][Aa][Ll][Ss][Ee]
95 : null [Nn][Uu][Ll][Ll]
96 : is [Ii][Ss]
97 : isnull [Ii][Ss][Nn][Uu][Ll][Ll]
98 : notnull [Nn][Oo][Tt][Nn][Uu][Ll][Ll]
99 :
100 : /* Exclusive states */
101 : %x EXPR
102 :
103 : %%
104 :
105 : %{
106 : /* Declare some local variables inside yylex(), for convenience */
107 : PsqlScanState cur_state = yyextra;
108 GIC 4957 :
109 : /*
110 : * Force flex into the state indicated by start_state. This has a
111 : * couple of purposes: it lets some of the functions below set a new
112 : * starting state without ugly direct access to flex variables, and it
113 : * allows us to transition from one flex lexer to another so that we
114 : * can lex different parts of the source string using separate lexers.
115 : */
116 : BEGIN(cur_state->start_state);
117 4957 :
118 ECB : /* Reset was-newline flag */
119 : last_was_newline = false;
120 GIC 4957 : %}
121 :
122 : /* INITIAL state */
123 :
124 : {nonspace}+ {
125 : /* Found a word, emit and return it */
126 : psqlscan_emit(cur_state, yytext, yyleng);
127 CBC 1119 : return 1;
128 GIC 1119 : }
129 :
130 ECB : /*
131 : * We need this rule to avoid returning "word\" instead of recognizing
132 : * a continuation marker just after a word:
133 : */
134 : {nonspace}+{continuation} {
135 GIC 1 : /* Found "word\\\r?\n", emit and return just "word" */
136 : int wordlen = yyleng - 2;
137 CBC 1 : if (yytext[wordlen] == '\r')
138 1 : wordlen--;
139 UIC 0 : Assert(yytext[wordlen] == '\\');
140 GIC 1 : psqlscan_emit(cur_state, yytext, wordlen);
141 1 : return 1;
142 1 : }
143 :
144 : {space}+ { /* ignore */ }
145 CBC 654 :
146 GIC 654 : {continuation} { /* ignore */ }
147 CBC 1 :
148 1 : {newline} {
149 GBC 58 : /* report end of command */
150 ECB : last_was_newline = true;
151 CBC 58 : return 0;
152 58 : }
153 :
154 : /* EXPR state */
155 ECB :
156 : <EXPR>{
157 CBC 53 :
158 ECB : "+" { return '+'; }
159 CBC 53 : "-" { return '-'; }
160 GIC 65 : "*" { return '*'; }
161 CBC 65 : "/" { return '/'; }
162 204 : "%" { return '%'; } /* C version, also in Pg SQL */
163 GIC 12 : "=" { return '='; }
164 2 : "<>" { return NE_OP; }
165 31 : "!=" { return NE_OP; } /* C version, also in Pg SQL */
166 5 : "<=" { return LE_OP; }
167 CBC 3 : ">=" { return GE_OP; }
168 GIC 4 : "<<" { return LS_OP; }
169 CBC 3 : ">>" { return RS_OP; }
170 7 : "<" { return '<'; }
171 1 : ">" { return '>'; }
172 10 : "|" { return '|'; }
173 6 : "&" { return '&'; }
174 2 : "#" { return '#'; }
175 1 : "~" { return '~'; }
176 1 :
177 2 : "(" { return '('; }
178 451 : ")" { return ')'; }
179 453 : "," { return ','; }
180 453 :
181 343 : {and} { return AND_OP; }
182 44 : {or} { return OR_OP; }
183 5 : {not} { return NOT_OP; }
184 5 : {is} { return IS_OP; }
185 15 : {isnull} { return ISNULL_OP; }
186 8 : {notnull} { return NOTNULL_OP; }
187 1 :
188 1 : {case} { return CASE_KW; }
189 15 : {when} { return WHEN_KW; }
190 16 : {then} { return THEN_KW; }
191 16 : {else} { return ELSE_KW; }
192 17 : {end} { return END_KW; }
193 11 :
194 15 : :{alnum}+ {
195 269 : yylval->str = pg_strdup(yytext + 1);
196 269 : return VARIABLE;
197 269 : }
198 ECB :
199 : {null} { return NULL_CONST; }
200 CBC 12 : {true} {
201 ECB : yylval->bval = true;
202 CBC 14 : return BOOLEAN_CONST;
203 14 : }
204 ECB : {false} {
205 : yylval->bval = false;
206 CBC 9 : return BOOLEAN_CONST;
207 9 : }
208 : "9223372036854775808" {
209 : /*
210 1 : * Special handling for PG_INT64_MIN, which can't
211 : * accurately be represented here, as the minus sign is
212 ECB : * lexed separately and INT64_MIN can't be represented as
213 : * a positive integer.
214 : */
215 : return MAXINT_PLUS_ONE_CONST;
216 CBC 1 : }
217 ECB : {digit}+ {
218 : if (!strtoint64(yytext, true, &yylval->ival))
219 GIC 794 : expr_yyerror_more(yyscanner, "bigint constant overflow",
220 CBC 1 : strdup(yytext));
221 GIC 1 : return INTEGER_CONST;
222 793 : }
223 : {digit}+(\.{digit}*)?([eE][-+]?{digit}+)? {
224 : if (!strtodouble(yytext, true, &yylval->dval))
225 61 : expr_yyerror_more(yyscanner, "double constant overflow",
226 CBC 1 : strdup(yytext));
227 GIC 1 : return DOUBLE_CONST;
228 60 : }
229 ECB : \.{digit}+([eE][-+]?{digit}+)? {
230 : if (!strtodouble(yytext, true, &yylval->dval))
231 CBC 2 : expr_yyerror_more(yyscanner, "double constant overflow",
232 1 : strdup(yytext));
233 GIC 1 : return DOUBLE_CONST;
234 1 : }
235 ECB : {alpha}{alnum}* {
236 : yylval->str = pg_strdup(yytext);
237 CBC 399 : return FUNCTION;
238 399 : }
239 :
240 : {space}+ { /* ignore */ }
241 1639 :
242 1639 : {continuation} { /* ignore */ }
243 17 :
244 17 : {newline} {
245 GIC 353 : /* report end of command */
246 : last_was_newline = true;
247 CBC 353 : return 0;
248 353 : }
249 :
250 : . {
251 1 : /*
252 ECB : * must strdup yytext so that expr_yyerror_more doesn't
253 : * change it while finding end of line
254 : */
255 : expr_yyerror_more(yyscanner, "unexpected character",
256 GIC 1 : pg_strdup(yytext));
257 CBC 1 : /* NOTREACHED, syntax_error calls exit() */
258 ECB : return 0;
259 : }
260 :
261 : }
262 :
263 GIC 57 : <<EOF>> {
264 : if (cur_state->buffer_stack == NULL)
265 57 : return 0; /* end of input reached */
266 CBC 57 :
267 ECB : /*
268 : * We were expanding a variable, so pop the inclusion
269 : * stack and keep lexing
270 : */
271 : psqlscan_pop_buffer_stack(cur_state);
272 UIC 0 : psqlscan_select_top_buffer(cur_state);
273 LBC 0 : }
274 :
275 0 : %%
276 0 :
277 : /* LCOV_EXCL_STOP */
278 :
279 : void
280 : expr_yyerror_more(yyscan_t yyscanner, const char *message, const char *more)
281 GIC 19 : {
282 EUB : PsqlScanState state = yyget_extra(yyscanner);
283 GBC 19 : int error_detection_offset = expr_scanner_offset(state) - 1;
284 GIC 19 : YYSTYPE lval;
285 EUB : char *full_line;
286 :
287 : /*
288 : * While parsing an expression, we may not have collected the whole line
289 : * yet from the input source. Lex till EOL so we can report whole line.
290 : * (If we're at EOF, it's okay to call yylex() an extra time.)
291 ECB : */
292 : if (!last_was_newline)
293 CBC 19 : {
294 ECB : while (yylex(&lval, yyscanner))
295 GIC 25 : /* skip */ ;
296 : }
297 :
298 : /* Extract the line, trimming trailing newline if any */
299 : full_line = expr_scanner_get_substring(state,
300 19 : expr_start_offset,
301 : expr_scanner_offset(state),
302 : true);
303 ECB :
304 : syntax_error(expr_source, expr_lineno, full_line, expr_command,
305 CBC 19 : message, more, error_detection_offset - expr_start_offset);
306 : }
307 :
308 : void
309 : expr_yyerror(yyscan_t yyscanner, const char *message)
310 6 : {
311 : expr_yyerror_more(yyscanner, message, NULL);
312 GIC 6 : }
313 :
314 : /*
315 ECB : * Collect a space-separated word from a backslash command and return it
316 : * in word_buf, along with its starting string offset in *offset.
317 : * Returns true if successful, false if at end of command.
318 : */
319 : bool
320 : expr_lex_one_word(PsqlScanState state, PQExpBuffer word_buf, int *offset)
321 GIC 1202 : {
322 ECB : int lexresult;
323 : YYSTYPE lval;
324 :
325 : /* Must be scanning already */
326 : Assert(state->scanbufhandle != NULL);
327 GIC 1202 :
328 : /* Set current output target */
329 : state->output_buf = word_buf;
330 1202 : resetPQExpBuffer(word_buf);
331 CBC 1202 :
332 : /* Set input source */
333 : if (state->buffer_stack != NULL)
334 GIC 1202 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
335 UIC 0 : else
336 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
337 CBC 1202 :
338 : /* Set start state */
339 : state->start_state = INITIAL;
340 1202 :
341 ECB : /* And lex. */
342 : lexresult = yylex(&lval, state->scanner);
343 GIC 1202 :
344 ECB : /*
345 EUB : * Save start offset of word, if any. We could do this more efficiently,
346 : * but for now this seems fine.
347 ECB : */
348 : if (lexresult)
349 GIC 1202 : *offset = expr_scanner_offset(state) - word_buf->len;
350 CBC 1120 : else
351 : *offset = -1;
352 GIC 82 :
353 ECB : /*
354 : * In case the caller returns to using the regular SQL lexer, reselect the
355 : * appropriate initial state.
356 : */
357 : psql_scan_reselect_sql_lexer(state);
358 GIC 1202 :
359 ECB : return (bool) lexresult;
360 CBC 1202 : }
361 :
362 ECB : /*
363 : * Prepare to lex an expression via expr_yyparse().
364 : *
365 : * Returns the yyscan_t that is to be passed to expr_yyparse().
366 : * (This is just state->scanner, but callers don't need to know that.)
367 : */
368 : yyscan_t
369 : expr_scanner_init(PsqlScanState state,
370 CBC 384 : const char *source, int lineno, int start_offset,
371 : const char *command)
372 : {
373 : /* Save error context info */
374 : expr_source = source;
375 GIC 384 : expr_lineno = lineno;
376 384 : expr_start_offset = start_offset;
377 384 : expr_command = command;
378 384 :
379 : /* Must be scanning already */
380 ECB : Assert(state->scanbufhandle != NULL);
381 GIC 384 :
382 : /* Set current output target */
383 : state->output_buf = NULL;
384 384 :
385 ECB : /* Set input source */
386 : if (state->buffer_stack != NULL)
387 CBC 384 : yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
388 LBC 0 : else
389 : yy_switch_to_buffer(state->scanbufhandle, state->scanner);
390 GIC 384 :
391 ECB : /* Set start state */
392 : state->start_state = EXPR;
393 GIC 384 :
394 ECB : return state->scanner;
395 GIC 384 : }
396 :
397 ECB : /*
398 EUB : * Finish lexing an expression.
399 : */
400 ECB : void
401 : expr_scanner_finish(yyscan_t yyscanner)
402 GIC 365 : {
403 ECB : PsqlScanState state = yyget_extra(yyscanner);
404 GIC 365 :
405 ECB : /*
406 : * Reselect appropriate initial state for SQL lexer.
407 : */
408 : psql_scan_reselect_sql_lexer(state);
409 GIC 365 : }
410 365 :
411 : /*
412 ECB : * Get offset from start of string to end of current lexer token.
413 : *
414 : * We rely on the knowledge that flex modifies the scan buffer by storing
415 : * a NUL at the end of the current token (yytext). Note that this might
416 : * not work quite right if we were parsing a sub-buffer, but since pgbench
417 : * never invokes that functionality, it doesn't matter.
418 : */
419 : int
420 : expr_scanner_offset(PsqlScanState state)
421 GIC 2333 : {
422 : return strlen(state->scanbuf);
423 2333 : }
424 :
425 : /*
426 : * Get a malloc'd copy of the lexer input string from start_offset
427 : * to just before end_offset. If chomp is true, drop any trailing
428 : * newline(s).
429 : */
430 : char *
431 ECB : expr_scanner_get_substring(PsqlScanState state,
432 GIC 465 : int start_offset, int end_offset,
433 ECB : bool chomp)
434 : {
435 : char *result;
436 : const char *scanptr = state->scanbuf + start_offset;
437 GIC 465 : int slen = end_offset - start_offset;
438 465 :
439 : Assert(slen >= 0);
440 465 : Assert(end_offset <= strlen(state->scanbuf));
441 465 :
442 ECB : if (chomp)
443 GIC 465 : {
444 : while (slen > 0 &&
445 876 : (scanptr[slen - 1] == '\n' || scanptr[slen - 1] == '\r'))
446 876 : slen--;
447 CBC 411 : }
448 ECB :
449 : result = (char *) pg_malloc(slen + 1);
450 CBC 465 : memcpy(result, scanptr, slen);
451 465 : result[slen] = '\0';
452 GIC 465 :
453 ECB : return result;
454 GIC 465 : }
455 ECB :
456 : /*
457 : * Get the line number associated with the given string offset
458 : * (which must not be past the end of where we've lexed to).
459 : */
460 : int
461 : expr_scanner_get_lineno(PsqlScanState state, int offset)
462 CBC 1502 : {
463 : int lineno = 1;
464 1502 : const char *p = state->scanbuf;
465 GIC 1502 :
466 : while (*p && offset > 0)
467 773365 : {
468 : if (*p == '\n')
469 771863 : lineno++;
470 20049 : p++, offset--;
471 771863 : }
472 ECB : return lineno;
473 GIC 1502 : }
|