TLA Line data Source code
1 : %top{
2 : /*-------------------------------------------------------------------------
3 : *
4 : * pgc.l
5 : * lexical scanner for ecpg
6 : *
7 : * This is a modified version of src/backend/parser/scan.l
8 : *
9 : * The ecpg scanner is not backup-free, so the fail rules are
10 : * only here to simplify syncing this file with scan.l.
11 : *
12 : *
13 : * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
14 : * Portions Copyright (c) 1994, Regents of the University of California
15 : *
16 : * IDENTIFICATION
17 : * src/interfaces/ecpg/preproc/pgc.l
18 : *
19 : *-------------------------------------------------------------------------
20 : */
21 : #include "postgres_fe.h"
22 :
23 : #include <ctype.h>
24 : #include <limits.h>
25 :
26 : #include "common/string.h"
27 :
28 : #include "preproc_extern.h"
29 : #include "preproc.h"
30 : }
31 :
32 : %{
33 :
34 : /* LCOV_EXCL_START */
35 :
36 : extern YYSTYPE base_yylval;
37 :
38 : static int xcdepth = 0; /* depth of nesting in slash-star comments */
39 : static char *dolqstart = NULL; /* current $foo$ quote start string */
40 :
41 : /*
42 : * literalbuf is used to accumulate literal values when multiple rules
43 : * are needed to parse a single literal. Call startlit to reset buffer
44 : * to empty, addlit to add text. Note that the buffer is permanently
45 : * malloc'd to the largest size needed so far in the current run.
46 : */
47 : static char *literalbuf = NULL; /* expandable buffer */
48 : static int literallen; /* actual current length */
49 : static int literalalloc; /* current allocated buffer size */
50 :
51 : /* Used for detecting global state together with braces_open */
52 : static int parenths_open;
53 :
54 : /* Used to tell parse_include() whether the command was #include or #include_next */
55 : static bool include_next;
56 :
57 : #define startlit() (literalbuf[0] = '\0', literallen = 0)
58 : static void addlit(char *ytext, int yleng);
59 : static void addlitchar(unsigned char ychar);
60 : static int process_integer_literal(const char *token, YYSTYPE *lval, int base);
61 : static void parse_include(void);
62 : static bool ecpg_isspace(char ch);
63 : static bool isdefine(void);
64 : static bool isinformixdefine(void);
65 :
66 : char *token_start;
67 :
68 : /* vars to keep track of start conditions when scanning literals */
69 : static int state_before_str_start;
70 : static int state_before_str_stop;
71 :
72 : struct _yy_buffer
73 : {
74 : YY_BUFFER_STATE buffer;
75 : long lineno;
76 : char *filename;
77 : struct _yy_buffer *next;
78 : } *yy_buffer = NULL;
79 :
80 : static char *old;
81 :
82 : /*
83 : * Vars for handling ifdef/elif/endif constructs. preproc_tos is the current
84 : * nesting depth of such constructs, and stacked_if_value[preproc_tos] is the
85 : * state for the innermost level. (For convenience, stacked_if_value[0] is
86 : * initialized as though we are in the active branch of some outermost IF.)
87 : * The active field is true if the current branch is active (being expanded).
88 : * The saw_active field is true if we have found any successful branch,
89 : * so that all subsequent branches of this level should be skipped.
90 : * The else_branch field is true if we've found an 'else' (so that another
91 : * 'else' or 'elif' at this level is an error.)
92 : * For IFs nested within an inactive branch, all branches always have active
93 : * set to false, but saw_active and else_branch are maintained normally.
94 : * ifcond is valid only while evaluating an if-condition; it's true if we
95 : * are doing ifdef, false if ifndef.
96 : */
97 : #define MAX_NESTED_IF 128
98 : static short preproc_tos;
99 : static bool ifcond;
100 : static struct _if_value
101 : {
102 : bool active;
103 : bool saw_active;
104 : bool else_branch;
105 : } stacked_if_value[MAX_NESTED_IF];
106 :
107 : %}
108 :
109 : %option 8bit
110 : %option never-interactive
111 : %option nodefault
112 : %option noinput
113 : %option noyywrap
114 : %option warn
115 : %option yylineno
116 : %option prefix="base_yy"
117 :
118 : /*
119 : * OK, here is a short description of lex/flex rules behavior.
120 : * The longest pattern which matches an input string is always chosen.
121 : * For equal-length patterns, the first occurring in the rules list is chosen.
122 : * INITIAL is the starting state, to which all non-conditional rules apply.
123 : * Exclusive states change parsing rules while the state is active. When in
124 : * an exclusive state, only those rules defined for that state apply.
125 : *
126 : * We use exclusive states for quoted strings, extended comments,
127 : * and to eliminate parsing troubles for numeric strings.
128 : * Exclusive states:
129 : * <xb> bit string literal
130 : * <xc> extended C-style comments
131 : * <xd> delimited identifiers (double-quoted identifiers)
132 : * <xdc> double-quoted strings in C
133 : * <xh> hexadecimal byte string
134 : * <xn> national character quoted strings
135 : * <xq> standard quoted strings
136 : * <xqs> quote stop (detect continued strings)
137 : * <xe> extended quoted strings (support backslash escape sequences)
138 : * <xqc> single-quoted strings in C
139 : * <xdolq> $foo$ quoted strings
140 : * <xui> quoted identifier with Unicode escapes
141 : * <xus> quoted string with Unicode escapes
142 : * <xcond> condition of an EXEC SQL IFDEF construct
143 : * <xskip> skipping the inactive part of an EXEC SQL IFDEF construct
144 : *
145 : * Note: we intentionally don't mimic the backend's <xeu> state; we have
146 : * no need to distinguish it from <xe> state.
147 : *
148 : * Remember to add an <<EOF>> case whenever you add a new exclusive state!
149 : * The default one is probably not the right thing.
150 : */
151 :
152 : %x xb
153 : %x xc
154 : %x xd
155 : %x xdc
156 : %x xh
157 : %x xn
158 : %x xq
159 : %x xqs
160 : %x xe
161 : %x xqc
162 : %x xdolq
163 : %x xui
164 : %x xus
165 : %x xcond
166 : %x xskip
167 :
168 : /* Additional exclusive states that are specific to ECPG */
169 : %x C SQL incl def def_ident undef
170 :
171 : /*
172 : * In order to make the world safe for Windows and Mac clients as well as
173 : * Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
174 : * sequence will be seen as two successive newlines, but that doesn't cause
175 : * any problems. SQL-style comments, which start with -- and extend to the
176 : * next newline, are treated as equivalent to a single whitespace character.
177 : *
178 : * NOTE a fine point: if there is no newline following --, we will absorb
179 : * everything to the end of the input as a comment. This is correct. Older
180 : * versions of Postgres failed to recognize -- as a comment if the input
181 : * did not end with a newline.
182 : *
183 : * XXX perhaps \f (formfeed) should be treated as a newline as well?
184 : *
185 : * XXX if you change the set of whitespace characters, fix ecpg_isspace()
186 : * to agree.
187 : */
188 :
189 : space [ \t\n\r\f]
190 : horiz_space [ \t\f]
191 : newline [\n\r]
192 : non_newline [^\n\r]
193 :
194 : comment ("--"{non_newline}*)
195 :
196 : whitespace ({space}+|{comment})
197 :
198 : /*
199 : * SQL requires at least one newline in the whitespace separating
200 : * string literals that are to be concatenated. Silly, but who are we
201 : * to argue? Note that {whitespace_with_newline} should not have * after
202 : * it, whereas {whitespace} should generally have a * after it...
203 : */
204 :
205 : horiz_whitespace ({horiz_space}|{comment})
206 : whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
207 :
208 : quote '
209 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
210 : quotecontinue {whitespace_with_newline}{quote}
211 :
212 : /*
213 : * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
214 : * {quotecontinue}. It might seem that this could just be {whitespace}*,
215 : * but if there's a dash after {whitespace_with_newline}, it must be consumed
216 : * to see if there's another dash --- which would start a {comment} and thus
217 : * allow continuation of the {quotecontinue} token.
218 : */
219 : quotecontinuefail {whitespace}*"-"?
220 :
221 : /* Bit string
222 : */
223 : xbstart [bB]{quote}
224 : xbinside [^']*
225 :
226 : /* Hexadecimal byte string */
227 : xhstart [xX]{quote}
228 : xhinside [^']*
229 :
230 : /* National character */
231 : xnstart [nN]{quote}
232 :
233 : /* Quoted string that allows backslash escapes */
234 : xestart [eE]{quote}
235 : xeinside [^\\']+
236 : xeescape [\\][^0-7]
237 : xeoctesc [\\][0-7]{1,3}
238 : xehexesc [\\]x[0-9A-Fa-f]{1,2}
239 : xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
240 :
241 : /* Extended quote
242 : * xqdouble implements embedded quote, ''''
243 : */
244 : xqstart {quote}
245 : xqdouble {quote}{quote}
246 : xqcquote [\\]{quote}
247 : xqinside [^']+
248 :
249 : /* $foo$ style quotes ("dollar quoting")
250 : * The quoted string starts with $foo$ where "foo" is an optional string
251 : * in the form of an identifier, except that it may not contain "$",
252 : * and extends to the first occurrence of an identical string.
253 : * There is *no* processing of the quoted text.
254 : *
255 : * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
256 : * fails to match its trailing "$".
257 : */
258 : dolq_start [A-Za-z\200-\377_]
259 : dolq_cont [A-Za-z\200-\377_0-9]
260 : dolqdelim \$({dolq_start}{dolq_cont}*)?\$
261 : dolqfailed \${dolq_start}{dolq_cont}*
262 : dolqinside [^$]+
263 :
264 : /* Double quote
265 : * Allows embedded spaces and other special characters into identifiers.
266 : */
267 : dquote \"
268 : xdstart {dquote}
269 : xdstop {dquote}
270 : xddouble {dquote}{dquote}
271 : xdinside [^"]+
272 :
273 : /* Quoted identifier with Unicode escapes */
274 : xuistart [uU]&{dquote}
275 :
276 : /* Quoted string with Unicode escapes */
277 : xusstart [uU]&{quote}
278 :
279 : /* special stuff for C strings */
280 : xdcqq \\\\
281 : xdcqdq \\\"
282 : xdcother [^"]
283 : xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
284 :
285 :
286 : /* C-style comments
287 : *
288 : * The "extended comment" syntax closely resembles allowable operator syntax.
289 : * The tricky part here is to get lex to recognize a string starting with
290 : * slash-star as a comment, when interpreting it as an operator would produce
291 : * a longer match --- remember lex will prefer a longer match! Also, if we
292 : * have something like plus-slash-star, lex will think this is a 3-character
293 : * operator whereas we want to see it as a + operator and a comment start.
294 : * The solution is two-fold:
295 : * 1. append {op_chars}* to xcstart so that it matches as much text as
296 : * {operator} would. Then the tie-breaker (first matching rule of same
297 : * length) ensures xcstart wins. We put back the extra stuff with yyless()
298 : * in case it contains a star-slash that should terminate the comment.
299 : * 2. In the operator rule, check for slash-star within the operator, and
300 : * if found throw it back with yyless(). This handles the plus-slash-star
301 : * problem.
302 : * Dash-dash comments have similar interactions with the operator rule.
303 : */
304 : xcstart \/\*{op_chars}*
305 : xcstop \*+\/
306 : xcinside [^*/]+
307 :
308 : ident_start [A-Za-z\200-\377_]
309 : ident_cont [A-Za-z\200-\377_0-9\$]
310 :
311 : identifier {ident_start}{ident_cont}*
312 :
313 : array ({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
314 :
315 : /* Assorted special-case operators and operator-like tokens */
316 : typecast "::"
317 : dot_dot \.\.
318 : colon_equals ":="
319 :
320 : /*
321 : * These operator-like tokens (unlike the above ones) also match the {operator}
322 : * rule, which means that they might be overridden by a longer match if they
323 : * are followed by a comment start or a + or - character. Accordingly, if you
324 : * add to this list, you must also add corresponding code to the {operator}
325 : * block to return the correct token in such cases. (This is not needed in
326 : * psqlscan.l since the token value is ignored there.)
327 : */
328 : equals_greater "=>"
329 : less_equals "<="
330 : greater_equals ">="
331 : less_greater "<>"
332 : not_equals "!="
333 :
334 : /*
335 : * "self" is the set of chars that should be returned as single-character
336 : * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
337 : * which can be one or more characters long (but if a single-char token
338 : * appears in the "self" set, it is not to be returned as an Op). Note
339 : * that the sets overlap, but each has some chars that are not in the other.
340 : *
341 : * If you change either set, adjust the character lists appearing in the
342 : * rule for "operator"!
343 : */
344 : self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
345 : op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
346 : operator {op_chars}+
347 :
348 : /*
349 : * Numbers
350 : *
351 : * Unary minus is not part of a number here. Instead we pass it separately to
352 : * the parser, and there it gets coerced via doNegate().
353 : *
354 : * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
355 : *
356 : * {realfail} is added to prevent the need for scanner
357 : * backup when the {real} rule fails to match completely.
358 : */
359 : decdigit [0-9]
360 : hexdigit [0-9A-Fa-f]
361 : octdigit [0-7]
362 : bindigit [0-1]
363 :
364 : decinteger {decdigit}(_?{decdigit})*
365 : hexinteger 0[xX](_?{hexdigit})+
366 : octinteger 0[oO](_?{octdigit})+
367 : bininteger 0[bB](_?{bindigit})+
368 :
369 : hexfail 0[xX]_?
370 : octfail 0[oO]_?
371 : binfail 0[bB]_?
372 :
373 : numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
374 : numericfail {decdigit}+\.\.
375 :
376 : real ({decinteger}|{numeric})[Ee][-+]?{decinteger}
377 : realfail ({decinteger}|{numeric})[Ee][-+]
378 :
379 : decinteger_junk {decinteger}{ident_start}
380 : hexinteger_junk {hexinteger}{ident_start}
381 : octinteger_junk {octinteger}{ident_start}
382 : bininteger_junk {bininteger}{ident_start}
383 : numeric_junk {numeric}{ident_start}
384 : real_junk {real}{ident_start}
385 :
386 : param \${decinteger}
387 : param_junk \${decinteger}{ident_start}
388 :
389 : /* special characters for other dbms */
390 : /* we have to react differently in compat mode */
391 : informix_special [\$]
392 :
393 : other .
394 :
395 : /*
396 : * Dollar quoted strings are totally opaque, and no escaping is done on them.
397 : * Other quoted strings must allow some special characters such as single-quote
398 : * and newline.
399 : * Embedded single-quotes are implemented both in the SQL standard
400 : * style of two adjacent single quotes "''" and in the Postgres/Java style
401 : * of escaped-quote "\'".
402 : * Other embedded escaped characters are matched explicitly and the leading
403 : * backslash is dropped from the string.
404 : * Note that xcstart must appear before operator, as explained above!
405 : * Also whitespace (comment) must appear before operator.
406 : */
407 :
408 : /* some stuff needed for ecpg */
409 : exec [eE][xX][eE][cC]
410 : sql [sS][qQ][lL]
411 : define [dD][eE][fF][iI][nN][eE]
412 : include [iI][nN][cC][lL][uU][dD][eE]
413 : include_next [iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
414 : import [iI][mM][pP][oO][rR][tT]
415 : undef [uU][nN][dD][eE][fF]
416 :
417 : ccomment "//".*\n
418 :
419 : if [iI][fF]
420 : ifdef [iI][fF][dD][eE][fF]
421 : ifndef [iI][fF][nN][dD][eE][fF]
422 : else [eE][lL][sS][eE]
423 : elif [eE][lL][iI][fF]
424 : endif [eE][nN][dD][iI][fF]
425 :
426 : struct [sS][tT][rR][uU][cC][tT]
427 :
428 : exec_sql {exec}{space}*{sql}{space}*
429 : ipdigit ({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
430 : ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
431 :
432 : /* we might want to parse all cpp include files */
433 : cppinclude {space}*#{include}{space}*
434 : cppinclude_next {space}*#{include_next}{space}*
435 :
436 : /* take care of cpp lines, they may also be continued */
437 : /* first a general line for all commands not starting with "i" */
438 : /* and then the other commands starting with "i", we have to add these
439 : * separately because the cppline production would match on "include" too
440 : */
441 : cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+\/)|.|\\{space}*{newline})*{newline}
442 :
443 : %%
444 :
445 : %{
446 : /* code to execute during start of each call of yylex() */
447 : token_start = NULL;
448 GIC 34712 : %}
449 :
450 : <SQL>{
451 : {whitespace} {
452 : /* ignore */
453 : }
454 : } /* <SQL> */
455 5074 :
456 : <C,SQL>{
457 : {xcstart} {
458 483 : token_start = yytext;
459 483 : state_before_str_start = YYSTATE;
460 CBC 483 : xcdepth = 0;
461 GIC 483 : BEGIN(xc);
462 483 : /* Put back any characters past slash-star; see above */
463 : yyless(2);
464 635 : fputs("/*", yyout);
465 483 : }
466 : } /* <C,SQL> */
467 CBC 483 :
468 : <xc>{
469 : {xcstart} {
470 LBC 0 : if (state_before_str_start == SQL)
471 0 : {
472 ECB : xcdepth++;
473 LBC 0 : /* Put back any characters past slash-star; see above */
474 ECB : yyless(2);
475 UIC 0 : fputs("/_*", yyout);
476 LBC 0 : }
477 ECB : else if (state_before_str_start == C)
478 UIC 0 : {
479 ECB : ECHO;
480 UIC 0 : }
481 : }
482 EUB :
483 UBC 0 : {xcstop} {
484 GIC 483 : if (state_before_str_start == SQL)
485 GBC 483 : {
486 : if (xcdepth <= 0)
487 UBC 0 : {
488 EUB : ECHO;
489 UIC 0 : BEGIN(SQL);
490 UBC 0 : token_start = NULL;
491 UIC 0 : }
492 EUB : else
493 : {
494 : xcdepth--;
495 UBC 0 : fputs("*_/", yyout);
496 LBC 0 : }
497 ECB : }
498 : else if (state_before_str_start == C)
499 GBC 483 : {
500 : ECHO;
501 483 : BEGIN(C);
502 483 : token_start = NULL;
503 483 : }
504 : }
505 :
506 GIC 483 : {xcinside} {
507 GBC 575 : ECHO;
508 575 : }
509 :
510 GIC 575 : {op_chars} {
511 CBC 92 : ECHO;
512 GIC 92 : }
513 ECB :
514 CBC 92 : \*+ {
515 1 : ECHO;
516 GIC 1 : }
517 :
518 CBC 1 : <<EOF>> {
519 LBC 0 : mmfatal(PARSE_ERROR, "unterminated /* comment");
520 0 : }
521 : } /* <xc> */
522 ECB :
523 : <SQL>{
524 : {xbstart} {
525 GIC 1 : token_start = yytext;
526 CBC 1 : state_before_str_start = YYSTATE;
527 1 : BEGIN(xb);
528 1 : startlit();
529 GIC 1 : }
530 ECB : } /* <SQL> */
531 GBC 1 :
532 EUB : <xh>{xhinside} |
533 GIC 2 : <xb>{xbinside} {
534 : addlit(yytext, yyleng);
535 2 : }
536 : <xb><<EOF>> { mmfatal(PARSE_ERROR, "unterminated bit string literal"); }
537 CBC 2 :
538 LBC 0 : <SQL>{xhstart} {
539 CBC 1 : token_start = yytext;
540 1 : state_before_str_start = YYSTATE;
541 1 : BEGIN(xh);
542 GIC 1 : startlit();
543 CBC 1 : }
544 : <xh><<EOF>> { mmfatal(PARSE_ERROR, "unterminated hexadecimal string literal"); }
545 1 :
546 UIC 0 : <C>{xqstart} {
547 CBC 15 : token_start = yytext;
548 GIC 15 : state_before_str_start = YYSTATE;
549 CBC 15 : BEGIN(xqc);
550 GBC 15 : startlit();
551 CBC 15 : }
552 ECB :
553 CBC 15 : <SQL>{
554 ECB : {xnstart} {
555 CBC 1 : /* National character.
556 : * Transfer it as-is to the backend.
557 ECB : */
558 EUB : token_start = yytext;
559 CBC 1 : state_before_str_start = YYSTATE;
560 1 : BEGIN(xn);
561 1 : startlit();
562 1 : }
563 ECB :
564 GIC 1 : {xqstart} {
565 CBC 145 : token_start = yytext;
566 GIC 145 : state_before_str_start = YYSTATE;
567 CBC 145 : BEGIN(xq);
568 GIC 145 : startlit();
569 145 : }
570 : {xestart} {
571 CBC 145 : token_start = yytext;
572 3 : state_before_str_start = YYSTATE;
573 3 : BEGIN(xe);
574 3 : startlit();
575 GIC 3 : }
576 ECB : {xusstart} {
577 CBC 3 : token_start = yytext;
578 2 : state_before_str_start = YYSTATE;
579 2 : BEGIN(xus);
580 2 : startlit();
581 2 : }
582 : } /* <SQL> */
583 2 :
584 ECB : <xb,xh,xq,xqc,xe,xn,xus>{quote} {
585 CBC 168 : /*
586 ECB : * When we are scanning a quoted string and see an end
587 : * quote, we must look ahead for a possible continuation.
588 : * If we don't see one, we know the end quote was in fact
589 : * the end of the string. To reduce the lexer table size,
590 : * we use a single "xqs" state to do the lookahead for all
591 : * types of strings.
592 : */
593 : state_before_str_stop = YYSTATE;
594 GIC 168 : BEGIN(xqs);
595 CBC 168 : }
596 : <xqs>{quotecontinue} {
597 168 : /*
598 UIC 0 : * Found a quote continuation, so return to the in-quote
599 : * state and continue scanning the literal. Nothing is
600 : * added to the literal's contents.
601 : */
602 : BEGIN(state_before_str_stop);
603 0 : }
604 : <xqs>{quotecontinuefail} |
605 0 : <xqs>{other} |
606 CBC 168 : <xqs><<EOF>> {
607 ECB : /*
608 : * Failed to see a quote continuation. Throw back
609 : * everything after the end quote, and handle the string
610 EUB : * according to the state we were in previously.
611 : */
612 : yyless(0);
613 GIC 354 : BEGIN(state_before_str_start);
614 168 :
615 EUB : switch (state_before_str_stop)
616 GIC 168 : {
617 EUB : case xb:
618 CBC 1 : if (literalbuf[strspn(literalbuf, "01")] != '\0')
619 GIC 1 : mmerror(PARSE_ERROR, ET_ERROR, "invalid bit string literal");
620 UIC 0 : base_yylval.str = psprintf("b'%s'", literalbuf);
621 GIC 1 : return BCONST;
622 1 : case xh:
623 1 : if (literalbuf[strspn(literalbuf, "0123456789abcdefABCDEF")] != '\0')
624 1 : mmerror(PARSE_ERROR, ET_ERROR, "invalid hexadecimal string literal");
625 LBC 0 : base_yylval.str = psprintf("x'%s'", literalbuf);
626 CBC 1 : return XCONST;
627 GIC 1 : case xq:
628 CBC 160 : /* fallthrough */
629 : case xqc:
630 ECB : base_yylval.str = psprintf("'%s'", literalbuf);
631 CBC 160 : return SCONST;
632 GBC 160 : case xe:
633 CBC 3 : base_yylval.str = psprintf("E'%s'", literalbuf);
634 3 : return SCONST;
635 3 : case xn:
636 1 : base_yylval.str = psprintf("N'%s'", literalbuf);
637 GBC 1 : return SCONST;
638 CBC 1 : case xus:
639 2 : base_yylval.str = psprintf("U&'%s'", literalbuf);
640 2 : return USCONST;
641 GIC 2 : default:
642 UIC 0 : mmfatal(PARSE_ERROR, "unhandled previous state in xqs\n");
643 LBC 0 : }
644 ECB : }
645 :
646 : <xq,xe,xn,xus>{xqdouble} { addlit(yytext, yyleng); }
647 CBC 4 : <xqc>{xqcquote} { addlit(yytext, yyleng); }
648 4 : <xq,xqc,xn,xus>{xqinside} { addlit(yytext, yyleng); }
649 LBC 0 : <xe>{xeinside} {
650 CBC 165 : addlit(yytext, yyleng);
651 7 : }
652 ECB : <xe>{xeunicode} {
653 CBC 7 : addlit(yytext, yyleng);
654 UBC 0 : }
655 EUB : <xe>{xeescape} {
656 UIC 0 : addlit(yytext, yyleng);
657 GIC 5 : }
658 : <xe>{xeoctesc} {
659 CBC 5 : addlit(yytext, yyleng);
660 LBC 0 : }
661 EUB : <xe>{xehexesc} {
662 LBC 0 : addlit(yytext, yyleng);
663 0 : }
664 : <xe>. {
665 0 : /* This is only needed for \ just before EOF */
666 UBC 0 : addlitchar(yytext[0]);
667 UIC 0 : }
668 EUB : <xq,xqc,xe,xn,xus><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
669 LBC 0 :
670 UIC 0 : <SQL>{
671 ECB : {dolqdelim} {
672 GBC 2 : token_start = yytext;
673 GIC 2 : if (dolqstart)
674 GBC 2 : free(dolqstart);
675 UBC 0 : dolqstart = mm_strdup(yytext);
676 GIC 2 : BEGIN(xdolq);
677 GBC 2 : startlit();
678 2 : addlit(yytext, yyleng);
679 2 : }
680 : {dolqfailed} {
681 2 : /* throw back all but the initial "$" */
682 UBC 0 : yyless(1);
683 UIC 0 : /* and treat it as {other} */
684 ECB : return yytext[0];
685 LBC 0 : }
686 ECB : } /* <SQL> */
687 EUB :
688 ECB : <xdolq>{dolqdelim} {
689 CBC 3 : if (strcmp(yytext, dolqstart) == 0)
690 3 : {
691 ECB : addlit(yytext, yyleng);
692 GIC 2 : free(dolqstart);
693 CBC 2 : dolqstart = NULL;
694 GBC 2 : BEGIN(SQL);
695 2 : base_yylval.str = mm_strdup(literalbuf);
696 GIC 2 : return SCONST;
697 GBC 2 : }
698 : else
699 : {
700 : /*
701 ECB : * When we fail to match $...$ to dolqstart, transfer
702 : * the $... part to the output, but put back the final
703 : * $ for rescanning. Consider $delim$...$junk$delim$
704 : */
705 : addlit(yytext, yyleng - 1);
706 CBC 1 : yyless(yyleng - 1);
707 2 : }
708 ECB : }
709 : <xdolq>{dolqinside} {
710 GIC 1 : addlit(yytext, yyleng);
711 2 : }
712 : <xdolq>{dolqfailed} {
713 2 : addlit(yytext, yyleng);
714 UIC 0 : }
715 : <xdolq>. {
716 0 : /* single quote or dollar sign */
717 0 : addlitchar(yytext[0]);
718 LBC 0 : }
719 ECB : <xdolq><<EOF>> { mmfatal(PARSE_ERROR, "unterminated dollar-quoted string"); }
720 UIC 0 :
721 0 : <SQL>{
722 ECB : {xdstart} {
723 CBC 63 : state_before_str_start = YYSTATE;
724 GIC 63 : BEGIN(xd);
725 CBC 63 : startlit();
726 GBC 63 : }
727 : {xuistart} {
728 63 : state_before_str_start = YYSTATE;
729 1 : BEGIN(xui);
730 1 : startlit();
731 GIC 1 : }
732 EUB : } /* <SQL> */
733 GBC 1 :
734 : <xd>{xdstop} {
735 CBC 63 : BEGIN(state_before_str_start);
736 63 : if (literallen == 0)
737 63 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
738 LBC 0 : /*
739 : * The server will truncate the identifier here. We do
740 ECB : * not, as (1) it does not change the result; (2) we don't
741 : * know what NAMEDATALEN the server might use; (3) this
742 : * code path is also taken for literal query strings in
743 : * PREPARE and EXECUTE IMMEDIATE, which can certainly be
744 : * longer than NAMEDATALEN.
745 : */
746 : base_yylval.str = mm_strdup(literalbuf);
747 CBC 63 : return CSTRING;
748 63 : }
749 ECB : <xdc>{xdstop} {
750 EUB : BEGIN(state_before_str_start);
751 GIC 1080 : base_yylval.str = mm_strdup(literalbuf);
752 1080 : return CSTRING;
753 1080 : }
754 : <xui>{dquote} {
755 : BEGIN(state_before_str_start);
756 1 : if (literallen == 0)
757 1 : mmerror(PARSE_ERROR, ET_ERROR, "zero-length delimited identifier");
758 UIC 0 : /* The backend will truncate the identifier here. We do not as it does not change the result. */
759 ECB : base_yylval.str = psprintf("U&\"%s\"", literalbuf);
760 CBC 1 : return UIDENT;
761 GIC 1 : }
762 : <xd,xui>{xddouble} {
763 ECB : addlit(yytext, yyleng);
764 CBC 2 : }
765 ECB : <xd,xui>{xdinside} {
766 GIC 2 : addlit(yytext, yyleng);
767 66 : }
768 ECB : <xd,xui><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted identifier"); }
769 CBC 66 : <C>{xdstart} {
770 UBC 0 : state_before_str_start = YYSTATE;
771 GIC 1080 : BEGIN(xdc);
772 CBC 1080 : startlit();
773 1080 : }
774 : <xdc>{xdcinside} {
775 GIC 1080 : addlit(yytext, yyleng);
776 CBC 18978 : }
777 : <xdc><<EOF>> { mmfatal(PARSE_ERROR, "unterminated quoted string"); }
778 18978 :
779 LBC 0 : <SQL>{
780 : {typecast} {
781 CBC 6 : return TYPECAST;
782 GBC 6 : }
783 ECB :
784 : {dot_dot} {
785 LBC 0 : return DOT_DOT;
786 UIC 0 : }
787 ECB :
788 : {colon_equals} {
789 UIC 0 : return COLON_EQUALS;
790 LBC 0 : }
791 EUB :
792 : {equals_greater} {
793 LBC 0 : return EQUALS_GREATER;
794 0 : }
795 :
796 : {less_equals} {
797 GBC 1 : return LESS_EQUALS;
798 1 : }
799 :
800 : {greater_equals} {
801 UBC 0 : return GREATER_EQUALS;
802 0 : }
803 :
804 : {less_greater} {
805 0 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
806 EUB : return NOT_EQUALS;
807 UIC 0 : }
808 :
809 ECB : {not_equals} {
810 LBC 0 : /* We accept both "<>" and "!=" as meaning NOT_EQUALS */
811 : return NOT_EQUALS;
812 UIC 0 : }
813 EUB :
814 : {informix_special} {
815 UIC 0 : /* are we simulating Informix? */
816 : if (INFORMIX_MODE)
817 UBC 0 : {
818 : unput(':');
819 0 : }
820 : else
821 : return yytext[0];
822 0 : }
823 :
824 0 : {self} {
825 GIC 2939 : /*
826 : * We may find a ';' inside a structure
827 EUB : * definition in a TYPE or VAR statement.
828 : * This is not an EOL marker.
829 : */
830 : if (yytext[0] == ';' && struct_level == 0)
831 GBC 2939 : BEGIN(C);
832 GIC 1300 : return yytext[0];
833 2939 : }
834 EUB :
835 : {operator} {
836 GBC 19 : /*
837 ECB : * Check for embedded slash-star or dash-dash; those
838 : * are comment starts, so operator must stop there.
839 : * Note that slash-star or dash-dash at the first
840 : * character will match a prior rule, not this one.
841 : */
842 : int nchars = yyleng;
843 CBC 19 : char *slashstar = strstr(yytext, "/*");
844 19 : char *dashdash = strstr(yytext, "--");
845 19 :
846 : if (slashstar && dashdash)
847 GIC 19 : {
848 ECB : /* if both appear, take the first one */
849 : if (slashstar > dashdash)
850 UIC 0 : slashstar = dashdash;
851 0 : }
852 : else if (!slashstar)
853 GIC 19 : slashstar = dashdash;
854 19 : if (slashstar)
855 CBC 19 : nchars = slashstar - yytext;
856 LBC 0 :
857 ECB : /*
858 : * For SQL compatibility, '+' and '-' cannot be the
859 : * last char of a multi-char operator unless the operator
860 : * contains chars that are not in SQL operators.
861 : * The idea is to lex '=-' as two operators, but not
862 EUB : * to forbid operator names like '?-' that could not be
863 : * sequences of SQL operators.
864 : */
865 ECB : if (nchars > 1 &&
866 CBC 19 : (yytext[nchars - 1] == '+' ||
867 14 : yytext[nchars - 1] == '-'))
868 GBC 14 : {
869 : int ic;
870 :
871 : for (ic = nchars - 2; ic >= 0; ic--)
872 GIC 6 : {
873 : char c = yytext[ic];
874 3 : if (c == '~' || c == '!' || c == '@' ||
875 3 : c == '#' || c == '^' || c == '&' ||
876 3 : c == '|' || c == '`' || c == '?' ||
877 3 : c == '%')
878 ECB : break;
879 : }
880 : if (ic < 0)
881 GIC 3 : {
882 : /*
883 : * didn't find a qualifying character, so remove
884 ECB : * all trailing [+-]
885 : */
886 : do {
887 : nchars--;
888 CBC 3 : } while (nchars > 1 &&
889 3 : (yytext[nchars - 1] == '+' ||
890 UIC 0 : yytext[nchars - 1] == '-'));
891 0 : }
892 : }
893 ECB :
894 : if (nchars < yyleng)
895 GIC 19 : {
896 : /* Strip the unwanted chars from the token */
897 : yyless(nchars);
898 6 : /*
899 : * If what we have left is only one char, and it's
900 ECB : * one of the characters matching "self", then
901 : * return it as a character token the same way
902 EUB : * that the "self" rule would have.
903 : */
904 : if (nchars == 1 &&
905 GIC 3 : strchr(",()[].;:+-*/%^<>=", yytext[0]))
906 3 : return yytext[0];
907 CBC 3 : /*
908 : * Likewise, if what we have left is two chars, and
909 : * those match the tokens ">=", "<=", "=>", "<>" or
910 ECB : * "!=", then we must return the appropriate token
911 : * rather than the generic Op.
912 : */
913 : if (nchars == 2)
914 UIC 0 : {
915 : if (yytext[0] == '=' && yytext[1] == '>')
916 0 : return EQUALS_GREATER;
917 LBC 0 : if (yytext[0] == '>' && yytext[1] == '=')
918 0 : return GREATER_EQUALS;
919 0 : if (yytext[0] == '<' && yytext[1] == '=')
920 UIC 0 : return LESS_EQUALS;
921 0 : if (yytext[0] == '<' && yytext[1] == '>')
922 0 : return NOT_EQUALS;
923 0 : if (yytext[0] == '!' && yytext[1] == '=')
924 0 : return NOT_EQUALS;
925 0 : }
926 EUB : }
927 :
928 : base_yylval.str = mm_strdup(yytext);
929 GBC 16 : return Op;
930 16 : }
931 EUB :
932 : {param} {
933 GBC 11 : base_yylval.ival = atol(yytext+1);
934 11 : return PARAM;
935 11 : }
936 EUB : {param_junk} {
937 : mmfatal(PARSE_ERROR, "trailing junk after parameter");
938 UIC 0 : }
939 :
940 : {ip} {
941 CBC 1 : base_yylval.str = mm_strdup(yytext);
942 1 : return IP;
943 GIC 1 : }
944 : } /* <SQL> */
945 ECB :
946 : <C,SQL>{
947 : {decinteger} {
948 GNC 1178 : return process_integer_literal(yytext, &base_yylval, 10);
949 GIC 1178 : }
950 : {hexinteger} {
951 : return process_integer_literal(yytext, &base_yylval, 16);
952 GNC 3 : }
953 : {numeric} {
954 : base_yylval.str = mm_strdup(yytext);
955 GIC 18 : return FCONST;
956 CBC 18 : }
957 : {numericfail} {
958 ECB : /* throw back the .., and treat as integer */
959 UIC 0 : yyless(yyleng - 2);
960 UNC 0 : return process_integer_literal(yytext, &base_yylval, 10);
961 UIC 0 : }
962 : {real} {
963 ECB : base_yylval.str = mm_strdup(yytext);
964 LBC 0 : return FCONST;
965 UIC 0 : }
966 : {realfail} {
967 ECB : /*
968 UIC 0 : * throw back the [Ee][+-], and figure out whether what
969 : * remains is an {decinteger} or {numeric}.
970 ECB : */
971 : yyless(yyleng - 2);
972 UNC 0 : return process_integer_literal(yytext, &base_yylval, 10);
973 UIC 0 : }
974 EUB : } /* <C,SQL> */
975 :
976 : <SQL>{
977 : {octinteger} {
978 UNC 0 : return process_integer_literal(yytext, &base_yylval, 8);
979 0 : }
980 : {bininteger} {
981 : return process_integer_literal(yytext, &base_yylval, 2);
982 0 : }
983 :
984 : /*
985 : * Note that some trailing junk is valid in C (such as 100LL), so we
986 EUB : * contain this to SQL mode.
987 : */
988 : {decinteger_junk} {
989 UIC 0 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
990 UBC 0 : }
991 : {hexinteger_junk} {
992 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
993 UNC 0 : }
994 : {octinteger_junk} {
995 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
996 0 : }
997 : {bininteger_junk} {
998 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
999 0 : }
1000 : {numeric_junk} {
1001 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1002 UIC 0 : }
1003 EUB : {real_junk} {
1004 : mmfatal(PARSE_ERROR, "trailing junk after numeric literal");
1005 UIC 0 : }
1006 :
1007 : :{identifier}((("->"|\.){identifier})|(\[{array}\]))* {
1008 GIC 613 : base_yylval.str = mm_strdup(yytext+1);
1009 GBC 613 : return CVARIABLE;
1010 613 : }
1011 :
1012 : {identifier} {
1013 5438 : if (!isdefine())
1014 GIC 5438 : {
1015 : int kwvalue;
1016 :
1017 : /*
1018 : * User-defined typedefs override SQL keywords, but
1019 : * not C keywords. Currently, a typedef name is just
1020 : * reported as IDENT, but someday we might need to
1021 : * return a distinct token type.
1022 : */
1023 : if (get_typedef(yytext, true) == NULL)
1024 GNC 5364 : {
1025 : /* Is it an SQL/ECPG keyword? */
1026 : kwvalue = ScanECPGKeywordLookup(yytext);
1027 5362 : if (kwvalue >= 0)
1028 5362 : return kwvalue;
1029 3792 : }
1030 EUB :
1031 : /* Is it a C keyword? */
1032 : kwvalue = ScanCKeywordLookup(yytext);
1033 GBC 1572 : if (kwvalue >= 0)
1034 GIC 1572 : return kwvalue;
1035 4 :
1036 EUB : /*
1037 : * None of the above. Return it as an identifier.
1038 : *
1039 : * The backend will attempt to truncate and case-fold
1040 : * the identifier, but I see no good reason for ecpg
1041 : * to do so; that's just another way that ecpg could get
1042 : * out of step with the backend.
1043 : */
1044 : base_yylval.str = mm_strdup(yytext);
1045 GBC 1568 : return IDENT;
1046 GIC 1568 : }
1047 : }
1048 ECB :
1049 CBC 74 : {other} {
1050 14 : return yytext[0];
1051 GIC 14 : }
1052 : } /* <SQL> */
1053 ECB :
1054 : /*
1055 : * Begin ECPG-specific rules
1056 : */
1057 :
1058 GIC 1251 : <C>{exec_sql} { BEGIN(SQL); return SQL_START; }
1059 1251 : <C>{informix_special} {
1060 : /* are we simulating Informix? */
1061 49 : if (INFORMIX_MODE)
1062 49 : {
1063 : BEGIN(SQL);
1064 CBC 49 : return SQL_START;
1065 GIC 49 : }
1066 : else
1067 ECB : return S_ANYTHING;
1068 LBC 0 : }
1069 ECB : <C>{ccomment} { ECHO; }
1070 GIC 1 : <C>{cppinclude} {
1071 1 : if (system_includes)
1072 CBC 215 : {
1073 ECB : include_next = false;
1074 GIC 2 : BEGIN(incl);
1075 2 : }
1076 ECB : else
1077 : {
1078 : base_yylval.str = mm_strdup(yytext);
1079 GIC 213 : return CPP_LINE;
1080 213 : }
1081 : }
1082 : <C>{cppinclude_next} {
1083 2 : if (system_includes)
1084 UIC 0 : {
1085 ECB : include_next = true;
1086 LBC 0 : BEGIN(incl);
1087 UIC 0 : }
1088 ECB : else
1089 : {
1090 : base_yylval.str = mm_strdup(yytext);
1091 LBC 0 : return CPP_LINE;
1092 0 : }
1093 : }
1094 : <C,SQL>{cppline} {
1095 UBC 0 : base_yylval.str = mm_strdup(yytext);
1096 GIC 498 : return CPP_LINE;
1097 CBC 498 : }
1098 ECB : <C>{identifier} {
1099 : /*
1100 GIC 8300 : * Try to detect a function name:
1101 ECB : * look for identifiers at the global scope
1102 : * keep the last identifier before the first '(' and '{'
1103 : */
1104 : if (braces_open == 0 && parenths_open == 0)
1105 GIC 8300 : {
1106 ECB : if (current_function)
1107 CBC 929 : free(current_function);
1108 GIC 784 : current_function = mm_strdup(yytext);
1109 929 : }
1110 ECB : /* Informix uses SQL defines only in SQL space */
1111 EUB : /* however, some defines have to be taken care of for compatibility */
1112 : if ((!INFORMIX_MODE || !isinformixdefine()) && !isdefine())
1113 GBC 8300 : {
1114 EUB : int kwvalue;
1115 :
1116 : kwvalue = ScanCKeywordLookup(yytext);
1117 GIC 8293 : if (kwvalue >= 0)
1118 GBC 8293 : return kwvalue;
1119 904 : else
1120 : {
1121 : base_yylval.str = mm_strdup(yytext);
1122 7389 : return IDENT;
1123 CBC 7389 : }
1124 ECB : }
1125 : }
1126 : <C>{xcstop} { mmerror(PARSE_ERROR, ET_ERROR, "nested /* ... */ comments"); }
1127 CBC 7 : <C>":" { return ':'; }
1128 UIC 0 : <C>";" { return ';'; }
1129 GIC 94 : <C>"," { return ','; }
1130 2638 : <C>"*" { return '*'; }
1131 1803 : <C>"%" { return '%'; }
1132 CBC 382 : <C>"/" { return '/'; }
1133 UIC 0 : <C>"+" { return '+'; }
1134 LBC 0 : <C>"-" { return '-'; }
1135 CBC 13 : <C>"(" { parenths_open++; return '('; }
1136 88 : <C>")" { parenths_open--; return ')'; }
1137 GIC 2052 : <C,xskip>{space} { ECHO; }
1138 2052 : <C>\{ { return '{'; }
1139 21685 : <C>\} { return '}'; }
1140 CBC 343 : <C>\[ { return '['; }
1141 GIC 343 : <C>\] { return ']'; }
1142 512 : <C>\= { return '='; }
1143 512 : <C>"->" { return S_MEMBER; }
1144 CBC 641 : <C>">>" { return S_RSHIFT; }
1145 96 : <C>"<<" { return S_LSHIFT; }
1146 1 : <C>"||" { return S_OR; }
1147 UIC 0 : <C>"&&" { return S_AND; }
1148 GIC 5 : <C>"++" { return S_INC; }
1149 CBC 14 : <C>"--" { return S_DEC; }
1150 95 : <C>"==" { return S_EQUAL; }
1151 GIC 1 : <C>"!=" { return S_NEQUAL; }
1152 54 : <C>"+=" { return S_ADD; }
1153 25 : <C>"-=" { return S_SUB; }
1154 LBC 0 : <C>"*=" { return S_MUL; }
1155 UBC 0 : <C>"/=" { return S_DIV; }
1156 LBC 0 : <C>"%=" { return S_MOD; }
1157 0 : <C>"->*" { return S_MEMPOINT; }
1158 0 : <C>".*" { return S_DOTPOINT; }
1159 0 : <C>{other} { return S_ANYTHING; }
1160 UBC 0 : <C>{exec_sql}{define}{space}* { BEGIN(def_ident); }
1161 GBC 1099 : <C>{informix_special}{define}{space}* {
1162 CBC 238 : /* are we simulating Informix? */
1163 2 : if (INFORMIX_MODE)
1164 2 : {
1165 ECB : BEGIN(def_ident);
1166 CBC 2 : }
1167 ECB : else
1168 : {
1169 : yyless(1);
1170 LBC 0 : return S_ANYTHING;
1171 0 : }
1172 ECB : }
1173 : <C>{exec_sql}{undef}{space}* { BEGIN(undef); }
1174 GBC 2 : <C>{informix_special}{undef}{space}* {
1175 CBC 1 : /* are we simulating Informix? */
1176 LBC 0 : if (INFORMIX_MODE)
1177 0 : {
1178 ECB : BEGIN(undef);
1179 LBC 0 : }
1180 ECB : else
1181 EUB : {
1182 : yyless(1);
1183 UBC 0 : return S_ANYTHING;
1184 0 : }
1185 EUB : }
1186 : <undef>{identifier}{space}*";" {
1187 UBC 0 : struct _defines *ptr, *ptr2 = NULL;
1188 CBC 1 : int i;
1189 ECB :
1190 : /*
1191 : * Skip the ";" and trailing whitespace. Note that yytext
1192 : * contains at least one non-space character plus the ";"
1193 : */
1194 : for (i = strlen(yytext)-2;
1195 GIC 1 : i > 0 && ecpg_isspace(yytext[i]);
1196 1 : i--)
1197 UBC 0 : ;
1198 EUB : yytext[i+1] = '\0';
1199 GIC 1 :
1200 :
1201 ECB : for (ptr = defines; ptr != NULL; ptr2 = ptr, ptr = ptr->next)
1202 CBC 4 : {
1203 EUB : if (strcmp(yytext, ptr->olddef) == 0)
1204 GBC 4 : {
1205 : if (ptr2 == NULL)
1206 1 : defines = ptr->next;
1207 UIC 0 : else
1208 : ptr2->next = ptr->next;
1209 GIC 1 : free(ptr->newdef);
1210 GBC 1 : free(ptr->olddef);
1211 1 : free(ptr);
1212 GIC 1 : break;
1213 1 : }
1214 EUB : }
1215 ECB :
1216 : BEGIN(C);
1217 GIC 1 : }
1218 : <undef>{other}|\n {
1219 1 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL UNDEF command");
1220 UIC 0 : yyterminate();
1221 : }
1222 ECB : <C>{exec_sql}{include}{space}* { BEGIN(incl); }
1223 CBC 81 : <C>{informix_special}{include}{space}* {
1224 GBC 81 : /* are we simulating Informix? */
1225 GIC 2 : if (INFORMIX_MODE)
1226 CBC 2 : {
1227 : BEGIN(incl);
1228 GIC 2 : }
1229 ECB : else
1230 : {
1231 : yyless(1);
1232 UIC 0 : return S_ANYTHING;
1233 LBC 0 : }
1234 EUB : }
1235 : <C,xskip>{exec_sql}{ifdef}{space}* {
1236 CBC 2 : if (preproc_tos >= MAX_NESTED_IF-1)
1237 3 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1238 LBC 0 : preproc_tos++;
1239 CBC 3 : stacked_if_value[preproc_tos].active = false;
1240 3 : stacked_if_value[preproc_tos].saw_active = false;
1241 GIC 3 : stacked_if_value[preproc_tos].else_branch = false;
1242 3 : ifcond = true;
1243 3 : BEGIN(xcond);
1244 CBC 3 : }
1245 : <C,xskip>{informix_special}{ifdef}{space}* {
1246 3 : /* are we simulating Informix? */
1247 UBC 0 : if (INFORMIX_MODE)
1248 UIC 0 : {
1249 : if (preproc_tos >= MAX_NESTED_IF-1)
1250 LBC 0 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1251 0 : preproc_tos++;
1252 0 : stacked_if_value[preproc_tos].active = false;
1253 0 : stacked_if_value[preproc_tos].saw_active = false;
1254 UIC 0 : stacked_if_value[preproc_tos].else_branch = false;
1255 LBC 0 : ifcond = true;
1256 UIC 0 : BEGIN(xcond);
1257 0 : }
1258 : else
1259 EUB : {
1260 : yyless(1);
1261 UIC 0 : return S_ANYTHING;
1262 0 : }
1263 ECB : }
1264 : <C,xskip>{exec_sql}{ifndef}{space}* {
1265 UBC 0 : if (preproc_tos >= MAX_NESTED_IF-1)
1266 CBC 4 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1267 LBC 0 : preproc_tos++;
1268 CBC 4 : stacked_if_value[preproc_tos].active = false;
1269 4 : stacked_if_value[preproc_tos].saw_active = false;
1270 4 : stacked_if_value[preproc_tos].else_branch = false;
1271 4 : ifcond = false;
1272 GIC 4 : BEGIN(xcond);
1273 CBC 4 : }
1274 EUB : <C,xskip>{informix_special}{ifndef}{space}* {
1275 GBC 4 : /* are we simulating Informix? */
1276 UIC 0 : if (INFORMIX_MODE)
1277 UBC 0 : {
1278 EUB : if (preproc_tos >= MAX_NESTED_IF-1)
1279 UBC 0 : mmfatal(PARSE_ERROR, "too many nested EXEC SQL IFDEF conditions");
1280 0 : preproc_tos++;
1281 0 : stacked_if_value[preproc_tos].active = false;
1282 0 : stacked_if_value[preproc_tos].saw_active = false;
1283 0 : stacked_if_value[preproc_tos].else_branch = false;
1284 0 : ifcond = false;
1285 UIC 0 : BEGIN(xcond);
1286 0 : }
1287 : else
1288 EUB : {
1289 : yyless(1);
1290 UIC 0 : return S_ANYTHING;
1291 0 : }
1292 EUB : }
1293 ECB : <C,xskip>{exec_sql}{elif}{space}* {
1294 UBC 0 : if (preproc_tos == 0)
1295 CBC 5 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1296 LBC 0 : if (stacked_if_value[preproc_tos].else_branch)
1297 CBC 5 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1298 LBC 0 : ifcond = true;
1299 CBC 5 : BEGIN(xcond);
1300 5 : }
1301 : <C,xskip>{informix_special}{elif}{space}* {
1302 5 : /* are we simulating Informix? */
1303 UBC 0 : if (INFORMIX_MODE)
1304 0 : {
1305 : if (preproc_tos == 0)
1306 0 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1307 0 : if (stacked_if_value[preproc_tos].else_branch)
1308 0 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1309 0 : ifcond = true;
1310 0 : BEGIN(xcond);
1311 0 : }
1312 EUB : else
1313 : {
1314 : yyless(1);
1315 UIC 0 : return S_ANYTHING;
1316 0 : }
1317 EUB : }
1318 :
1319 UIC 0 : <C,xskip>{exec_sql}{else}{space}*";" { /* only exec sql endif pops the stack, so take care of duplicated 'else' */
1320 GIC 4 : if (preproc_tos == 0)
1321 GBC 4 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1322 LBC 0 : else if (stacked_if_value[preproc_tos].else_branch)
1323 GBC 4 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1324 LBC 0 : else
1325 EUB : {
1326 ECB : stacked_if_value[preproc_tos].else_branch = true;
1327 CBC 4 : stacked_if_value[preproc_tos].active =
1328 GIC 4 : (stacked_if_value[preproc_tos-1].active &&
1329 CBC 8 : !stacked_if_value[preproc_tos].saw_active);
1330 GBC 4 : stacked_if_value[preproc_tos].saw_active = true;
1331 4 :
1332 : if (stacked_if_value[preproc_tos].active)
1333 4 : BEGIN(C);
1334 3 : else
1335 EUB : BEGIN(xskip);
1336 GBC 1 : }
1337 EUB : }
1338 : <C,xskip>{informix_special}{else}{space}*";" {
1339 GIC 4 : /* are we simulating Informix? */
1340 UIC 0 : if (INFORMIX_MODE)
1341 0 : {
1342 EUB : if (preproc_tos == 0)
1343 UBC 0 : mmfatal(PARSE_ERROR, "missing matching \"EXEC SQL IFDEF\" / \"EXEC SQL IFNDEF\"");
1344 UIC 0 : else if (stacked_if_value[preproc_tos].else_branch)
1345 0 : mmfatal(PARSE_ERROR, "more than one EXEC SQL ELSE");
1346 UBC 0 : else
1347 ECB : {
1348 : stacked_if_value[preproc_tos].else_branch = true;
1349 UBC 0 : stacked_if_value[preproc_tos].active =
1350 LBC 0 : (stacked_if_value[preproc_tos-1].active &&
1351 UBC 0 : !stacked_if_value[preproc_tos].saw_active);
1352 UIC 0 : stacked_if_value[preproc_tos].saw_active = true;
1353 0 :
1354 ECB : if (stacked_if_value[preproc_tos].active)
1355 LBC 0 : BEGIN(C);
1356 0 : else
1357 ECB : BEGIN(xskip);
1358 LBC 0 : }
1359 : }
1360 ECB : else
1361 : {
1362 : yyless(1);
1363 LBC 0 : return S_ANYTHING;
1364 UIC 0 : }
1365 : }
1366 ECB : <C,xskip>{exec_sql}{endif}{space}*";" {
1367 UBC 0 : if (preproc_tos == 0)
1368 GBC 7 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1369 UIC 0 : else
1370 EUB : preproc_tos--;
1371 GBC 7 :
1372 EUB : if (stacked_if_value[preproc_tos].active)
1373 GBC 7 : BEGIN(C);
1374 GIC 7 : else
1375 : BEGIN(xskip);
1376 UBC 0 : }
1377 EUB : <C,xskip>{informix_special}{endif}{space}*";" {
1378 GBC 7 : /* are we simulating Informix? */
1379 UBC 0 : if (INFORMIX_MODE)
1380 0 : {
1381 : if (preproc_tos == 0)
1382 0 : mmfatal(PARSE_ERROR, "unmatched EXEC SQL ENDIF");
1383 0 : else
1384 : preproc_tos--;
1385 0 :
1386 : if (stacked_if_value[preproc_tos].active)
1387 UIC 0 : BEGIN(C);
1388 0 : else
1389 : BEGIN(xskip);
1390 UBC 0 : }
1391 EUB : else
1392 : {
1393 : yyless(1);
1394 UBC 0 : return S_ANYTHING;
1395 LBC 0 : }
1396 EUB : }
1397 :
1398 LBC 0 : <xskip>{other} { /* ignore */ }
1399 GIC 247 :
1400 CBC 247 : <xcond>{identifier}{space}*";" {
1401 12 : {
1402 : struct _defines *defptr;
1403 EUB : unsigned int i;
1404 : bool this_active;
1405 ECB :
1406 EUB : /*
1407 : * Skip the ";" and trailing whitespace. Note that yytext
1408 : * contains at least one non-space character plus the ";"
1409 : */
1410 : for (i = strlen(yytext)-2;
1411 GIC 12 : i > 0 && ecpg_isspace(yytext[i]);
1412 GBC 12 : i--)
1413 UIC 0 : ;
1414 EUB : yytext[i+1] = '\0';
1415 GBC 12 :
1416 : for (defptr = defines;
1417 12 : defptr != NULL &&
1418 GIC 46 : strcmp(yytext, defptr->olddef) != 0;
1419 42 : defptr = defptr->next)
1420 34 : /* skip */ ;
1421 EUB :
1422 : this_active = (defptr ? ifcond : !ifcond);
1423 GIC 12 : stacked_if_value[preproc_tos].active =
1424 12 : (stacked_if_value[preproc_tos-1].active &&
1425 GBC 24 : !stacked_if_value[preproc_tos].saw_active &&
1426 CBC 12 : this_active);
1427 ECB : stacked_if_value[preproc_tos].saw_active |= this_active;
1428 CBC 12 : }
1429 :
1430 : if (stacked_if_value[preproc_tos].active)
1431 GIC 12 : BEGIN(C);
1432 4 : else
1433 : BEGIN(xskip);
1434 8 : }
1435 :
1436 12 : <xcond>{other}|\n {
1437 UIC 0 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL IFDEF command");
1438 LBC 0 : yyterminate();
1439 ECB : }
1440 EUB : <def_ident>{identifier} {
1441 : old = mm_strdup(yytext);
1442 CBC 240 : BEGIN(def);
1443 GIC 240 : startlit();
1444 CBC 240 : }
1445 ECB : <def_ident>{other}|\n {
1446 CBC 240 : mmfatal(PARSE_ERROR, "missing identifier in EXEC SQL DEFINE command");
1447 LBC 0 : yyterminate();
1448 : }
1449 : <def>{space}*";" {
1450 ECB : struct _defines *ptr, *this;
1451 CBC 240 :
1452 ECB : for (ptr = defines; ptr != NULL; ptr = ptr->next)
1453 CBC 640 : {
1454 : if (strcmp(old, ptr->olddef) == 0)
1455 400 : {
1456 : free(ptr->newdef);
1457 GIC 2 : ptr->newdef = mm_strdup(literalbuf);
1458 CBC 2 : }
1459 ECB : }
1460 : if (ptr == NULL)
1461 CBC 240 : {
1462 : this = (struct _defines *) mm_alloc(sizeof(struct _defines));
1463 240 :
1464 EUB : /* initial definition */
1465 : this->olddef = old;
1466 GIC 240 : this->newdef = mm_strdup(literalbuf);
1467 240 : this->next = defines;
1468 240 : this->used = NULL;
1469 CBC 240 : defines = this;
1470 240 : }
1471 ECB :
1472 : BEGIN(C);
1473 CBC 240 : }
1474 EUB : <def>[^;] { addlit(yytext, yyleng); }
1475 GIC 240 : <incl>\<[^\>]+\>{space}*";"? { parse_include(); }
1476 4142 : <incl>{dquote}{xdinside}{dquote}{space}*";"? { parse_include(); }
1477 2 : <incl>[^;\<\>\"]+";" { parse_include(); }
1478 LBC 0 : <incl>{other}|\n {
1479 GIC 83 : mmfatal(PARSE_ERROR, "syntax error in EXEC SQL INCLUDE command");
1480 LBC 0 : yyterminate();
1481 : }
1482 ECB :
1483 : <<EOF>> {
1484 CBC 230 : if (yy_buffer == NULL)
1485 230 : {
1486 : if (preproc_tos > 0)
1487 GIC 64 : {
1488 ECB : preproc_tos = 0;
1489 UIC 0 : mmfatal(PARSE_ERROR, "missing \"EXEC SQL ENDIF;\"");
1490 LBC 0 : }
1491 : yyterminate();
1492 GIC 64 : }
1493 ECB : else
1494 : {
1495 : struct _yy_buffer *yb = yy_buffer;
1496 CBC 166 : int i;
1497 ECB : struct _defines *ptr;
1498 :
1499 : for (ptr = defines; ptr; ptr = ptr->next)
1500 CBC 651 : if (ptr->used == yy_buffer)
1501 GIC 565 : {
1502 ECB : ptr->used = NULL;
1503 CBC 80 : break;
1504 80 : }
1505 EUB :
1506 ECB : if (yyin != NULL)
1507 GBC 166 : fclose(yyin);
1508 GIC 85 :
1509 : yy_delete_buffer(YY_CURRENT_BUFFER);
1510 166 : yy_switch_to_buffer(yy_buffer->buffer);
1511 CBC 166 :
1512 ECB : yylineno = yy_buffer->lineno;
1513 GIC 166 :
1514 ECB : /* We have to output the filename only if we change files here */
1515 : i = strcmp(input_filename, yy_buffer->filename);
1516 GBC 166 :
1517 EUB : free(input_filename);
1518 GIC 166 : input_filename = yy_buffer->filename;
1519 CBC 166 :
1520 : yy_buffer = yy_buffer->next;
1521 GIC 166 : free(yb);
1522 166 :
1523 ECB : if (i != 0)
1524 GIC 166 : output_line_number();
1525 85 :
1526 : }
1527 ECB : }
1528 :
1529 GIC 166 : <INITIAL>{other}|\n { mmfatal(PARSE_ERROR, "internal error: unreachable state; please report this to <%s>", PACKAGE_BUGREPORT); }
1530 LBC 0 :
1531 ECB : %%
1532 UIC 0 :
1533 : /* LCOV_EXCL_STOP */
1534 ECB :
1535 : void
1536 : lex_init(void)
1537 CBC 64 : {
1538 ECB : braces_open = 0;
1539 GIC 64 : parenths_open = 0;
1540 CBC 64 : current_function = NULL;
1541 GIC 64 :
1542 : yylineno = 1;
1543 CBC 64 :
1544 : /* initialize state for if/else/endif */
1545 ECB : preproc_tos = 0;
1546 CBC 64 : stacked_if_value[preproc_tos].active = true;
1547 GIC 64 : stacked_if_value[preproc_tos].saw_active = true;
1548 CBC 64 : stacked_if_value[preproc_tos].else_branch = false;
1549 64 :
1550 : /* initialize literal buffer to a reasonable but expansible size */
1551 ECB : if (literalbuf == NULL)
1552 CBC 64 : {
1553 : literalalloc = 1024;
1554 GIC 64 : literalbuf = (char *) mm_alloc(literalalloc);
1555 64 : }
1556 ECB : startlit();
1557 GBC 64 :
1558 : BEGIN(C);
1559 64 : }
1560 GIC 64 :
1561 : static void
1562 : addlit(char *ytext, int yleng)
1563 23378 : {
1564 ECB : /* enlarge buffer if needed */
1565 : if ((literallen+yleng) >= literalalloc)
1566 CBC 23378 : {
1567 ECB : do
1568 : literalalloc *= 2;
1569 UIC 0 : while ((literallen+yleng) >= literalalloc);
1570 LBC 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1571 UIC 0 : }
1572 : /* append new data, add trailing null */
1573 ECB : memcpy(literalbuf+literallen, ytext, yleng);
1574 CBC 23378 : literallen += yleng;
1575 23378 : literalbuf[literallen] = '\0';
1576 23378 : }
1577 GIC 23378 :
1578 : static void
1579 ECB : addlitchar(unsigned char ychar)
1580 UIC 0 : {
1581 ECB : /* enlarge buffer if needed */
1582 : if ((literallen+1) >= literalalloc)
1583 UIC 0 : {
1584 ECB : literalalloc *= 2;
1585 UIC 0 : literalbuf = (char *) realloc(literalbuf, literalalloc);
1586 LBC 0 : }
1587 ECB : /* append new data, add trailing null */
1588 : literalbuf[literallen] = ychar;
1589 UIC 0 : literallen += 1;
1590 LBC 0 : literalbuf[literallen] = '\0';
1591 UIC 0 : }
1592 0 :
1593 ECB : /*
1594 : * Process {decinteger}, {hexinteger}, etc. Note this will also do the right
1595 : * thing with {numeric}, ie digits and a decimal point.
1596 EUB : */
1597 : static int
1598 : process_integer_literal(const char *token, YYSTYPE *lval, int base)
1599 GIC 1181 : {
1600 : int val;
1601 ECB : char *endptr;
1602 :
1603 : errno = 0;
1604 GNC 1181 : val = strtoint(base == 10 ? token : token + 2, &endptr, base);
1605 GIC 1181 : if (*endptr != '\0' || errno == ERANGE)
1606 1181 : {
1607 EUB : /* integer too large (or contains decimal pt), treat it as a float */
1608 : lval->str = mm_strdup(token);
1609 GIC 6 : return FCONST;
1610 GBC 6 : }
1611 : lval->ival = val;
1612 1175 : return ICONST;
1613 1175 : }
1614 :
1615 : static void
1616 EUB : parse_include(void)
1617 GBC 85 : {
1618 EUB : /* got the include file name */
1619 : struct _yy_buffer *yb;
1620 : struct _include_path *ip;
1621 : char inc_file[MAXPGPATH];
1622 : unsigned int i;
1623 :
1624 : yb = mm_alloc(sizeof(struct _yy_buffer));
1625 GIC 85 :
1626 ECB : yb->buffer = YY_CURRENT_BUFFER;
1627 GIC 85 : yb->lineno = yylineno;
1628 85 : yb->filename = input_filename;
1629 85 : yb->next = yy_buffer;
1630 85 :
1631 ECB : yy_buffer = yb;
1632 CBC 85 :
1633 ECB : /*
1634 : * skip the ";" if there is one and trailing whitespace. Note that
1635 : * yytext contains at least one non-space character plus the ";"
1636 : */
1637 : for (i = strlen(yytext)-2;
1638 GIC 85 : i > 0 && ecpg_isspace(yytext[i]);
1639 CBC 86 : i--)
1640 1 : ;
1641 :
1642 : if (yytext[i] == ';')
1643 GIC 85 : i--;
1644 LBC 0 :
1645 : yytext[i+1] = '\0';
1646 GIC 85 :
1647 : yyin = NULL;
1648 85 :
1649 : /* If file name is enclosed in '"' remove these and look only in '.' */
1650 : /* Informix does look into all include paths though, except filename starts with '/' */
1651 : if (yytext[0] == '"' && yytext[i] == '"' &&
1652 CBC 85 : ((compat != ECPG_COMPAT_INFORMIX && compat != ECPG_COMPAT_INFORMIX_SE) || yytext[1] == '/'))
1653 UIC 0 : {
1654 ECB : yytext[i] = '\0';
1655 LBC 0 : memmove(yytext, yytext+1, strlen(yytext));
1656 0 :
1657 ECB : strlcpy(inc_file, yytext, sizeof(inc_file));
1658 UIC 0 : yyin = fopen(inc_file, "r");
1659 LBC 0 : if (!yyin)
1660 UIC 0 : {
1661 : if (strlen(inc_file) <= 2 || strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1662 0 : {
1663 : strcat(inc_file, ".h");
1664 0 : yyin = fopen(inc_file, "r");
1665 LBC 0 : }
1666 ECB : }
1667 :
1668 : }
1669 : else
1670 : {
1671 EUB : if ((yytext[0] == '"' && yytext[i] == '"') || (yytext[0] == '<' && yytext[i] == '>'))
1672 GIC 85 : {
1673 ECB : yytext[i] = '\0';
1674 GIC 2 : memmove(yytext, yytext+1, strlen(yytext));
1675 CBC 2 : }
1676 :
1677 : for (ip = include_paths; yyin == NULL && ip != NULL; ip = ip->next)
1678 GIC 232 : {
1679 ECB : if (strlen(ip->path) + strlen(yytext) + 4 > MAXPGPATH)
1680 GBC 147 : {
1681 : fprintf(stderr, _("Error: include path \"%s/%s\" is too long on line %d, skipping\n"), ip->path, yytext, yylineno);
1682 UBC 0 : continue;
1683 0 : }
1684 : snprintf (inc_file, sizeof(inc_file), "%s/%s", ip->path, yytext);
1685 GBC 147 : yyin = fopen(inc_file, "r");
1686 147 : if (!yyin)
1687 147 : {
1688 : if (strcmp(inc_file + strlen(inc_file) - 2, ".h") != 0)
1689 136 : {
1690 : strcat(inc_file, ".h");
1691 133 : yyin = fopen(inc_file, "r");
1692 133 : }
1693 : }
1694 : /* if the command was "include_next" we have to disregard the first hit */
1695 : if (yyin && include_next)
1696 GIC 147 : {
1697 : fclose (yyin);
1698 UIC 0 : yyin = NULL;
1699 LBC 0 : include_next = false;
1700 UIC 0 : }
1701 ECB : }
1702 : }
1703 : if (!yyin)
1704 GIC 85 : mmfatal(NO_INCLUDE_FILE, "could not open include file \"%s\" on line %d", yytext, yylineno);
1705 LBC 0 :
1706 : input_filename = mm_strdup(inc_file);
1707 CBC 85 : yy_switch_to_buffer(yy_create_buffer(yyin,YY_BUF_SIZE));
1708 GIC 85 : yylineno = 1;
1709 GBC 85 : output_line_number();
1710 85 :
1711 : BEGIN(C);
1712 CBC 85 : }
1713 85 :
1714 ECB : /*
1715 : * ecpg_isspace() --- return true if flex scanner considers char whitespace
1716 : */
1717 : static bool
1718 : ecpg_isspace(char ch)
1719 CBC 99 : {
1720 : if (ch == ' ' ||
1721 GIC 99 : ch == '\t' ||
1722 99 : ch == '\n' ||
1723 CBC 98 : ch == '\r' ||
1724 GIC 98 : ch == '\f')
1725 EUB : return true;
1726 GBC 1 : return false;
1727 98 : }
1728 :
1729 : static bool isdefine(void)
1730 GIC 13737 : {
1731 ECB : struct _defines *ptr;
1732 EUB :
1733 : /* is it a define? */
1734 ECB : for (ptr = defines; ptr; ptr = ptr->next)
1735 CBC 62635 : {
1736 ECB : if (strcmp(yytext, ptr->olddef) == 0 && ptr->used == NULL)
1737 CBC 48978 : {
1738 : struct _yy_buffer *yb;
1739 ECB :
1740 : yb = mm_alloc(sizeof(struct _yy_buffer));
1741 GIC 80 :
1742 : yb->buffer = YY_CURRENT_BUFFER;
1743 80 : yb->lineno = yylineno;
1744 80 : yb->filename = mm_strdup(input_filename);
1745 80 : yb->next = yy_buffer;
1746 CBC 80 :
1747 : ptr->used = yy_buffer = yb;
1748 80 :
1749 ECB : yy_scan_string(ptr->newdef);
1750 CBC 80 : return true;
1751 80 : }
1752 : }
1753 ECB :
1754 : return false;
1755 GIC 13657 : }
1756 :
1757 ECB : static bool isinformixdefine(void)
1758 GIC 1732 : {
1759 : const char *new = NULL;
1760 1732 :
1761 : if (strcmp(yytext, "dec_t") == 0)
1762 CBC 1732 : new = "decimal";
1763 GIC 1 : else if (strcmp(yytext, "intrvl_t") == 0)
1764 CBC 1731 : new = "interval";
1765 UIC 0 : else if (strcmp(yytext, "dtime_t") == 0)
1766 GIC 1731 : new = "timestamp";
1767 UIC 0 :
1768 ECB : if (new)
1769 GIC 1732 : {
1770 ECB : struct _yy_buffer *yb;
1771 :
1772 : yb = mm_alloc(sizeof(struct _yy_buffer));
1773 CBC 1 :
1774 : yb->buffer = YY_CURRENT_BUFFER;
1775 1 : yb->lineno = yylineno;
1776 GIC 1 : yb->filename = mm_strdup(input_filename);
1777 CBC 1 : yb->next = yy_buffer;
1778 1 : yy_buffer = yb;
1779 GIC 1 :
1780 : yy_scan_string(new);
1781 1 : return true;
1782 CBC 1 : }
1783 :
1784 : return false;
1785 1731 : }
|