LCOV - differential code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 80.7 % 565 456 54 45 10 40 233 19 164 59 235 3
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 18 18 18 18
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : %top{
       2                 : /*-------------------------------------------------------------------------
       3                 :  *
       4                 :  * psqlscan.l
       5                 :  *    lexical scanner for SQL commands
       6                 :  *
       7                 :  * This lexer used to be part of psql, and that heritage is reflected in
       8                 :  * the file name as well as function and typedef names, though it can now
       9                 :  * be used by other frontend programs as well.  It's also possible to extend
      10                 :  * this lexer with a compatible add-on lexer to handle program-specific
      11                 :  * backslash commands.
      12                 :  *
      13                 :  * This code is mainly concerned with determining where the end of a SQL
      14                 :  * statement is: we are looking for semicolons that are not within quotes,
      15                 :  * comments, or parentheses.  The most reliable way to handle this is to
      16                 :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17                 :  * below are (except for a few) the same as the backend's, but their actions
      18                 :  * are just ECHO whereas the backend's actions generally do other things.
      19                 :  *
      20                 :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21                 :  *
      22                 :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23                 :  *
      24                 :  * See psqlscan_int.h for additional commentary.
      25                 :  *
      26                 :  *
      27                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      28                 :  * Portions Copyright (c) 1994, Regents of the University of California
      29                 :  *
      30                 :  * IDENTIFICATION
      31                 :  *    src/fe_utils/psqlscan.l
      32                 :  *
      33                 :  *-------------------------------------------------------------------------
      34                 :  */
      35                 : #include "postgres_fe.h"
      36                 : 
      37                 : #include "common/logging.h"
      38                 : #include "fe_utils/psqlscan.h"
      39                 : 
      40                 : #include "libpq-fe.h"
      41                 : }
      42                 : 
      43                 : %{
      44                 : 
      45                 : /* LCOV_EXCL_START */
      46                 : 
      47                 : #include "fe_utils/psqlscan_int.h"
      48                 : 
      49                 : /*
      50                 :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51                 :  * doesn't presently make use of that argument, so just declare it as int.
      52                 :  */
      53                 : typedef int YYSTYPE;
      54                 : 
      55                 : /*
      56                 :  * Set the type of yyextra; we use it as a pointer back to the containing
      57                 :  * PsqlScanState.
      58                 :  */
      59                 : #define YY_EXTRA_TYPE PsqlScanState
      60                 : 
      61                 : 
      62                 : /* Return values from yylex() */
      63                 : #define LEXRES_EOL          0   /* end of input */
      64                 : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      65                 : #define LEXRES_BACKSLASH    2   /* backslash command start */
      66                 : 
      67                 : 
      68                 : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      69                 : 
      70                 : /*
      71                 :  * Work around a bug in flex 2.5.35: it emits a couple of functions that
      72                 :  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
      73                 :  * this would cause warnings.  Providing our own declarations should be
      74                 :  * harmless even when the bug gets fixed.
      75                 :  */
      76                 : extern int  psql_yyget_column(yyscan_t yyscanner);
      77                 : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
      78                 : 
      79                 : %}
      80                 : 
      81                 : %option reentrant
      82                 : %option bison-bridge
      83                 : %option 8bit
      84                 : %option never-interactive
      85                 : %option nodefault
      86                 : %option noinput
      87                 : %option nounput
      88                 : %option noyywrap
      89                 : %option warn
      90                 : %option prefix="psql_yy"
      91                 : 
      92                 : /*
      93                 :  * All of the following definitions and rules should exactly match
      94                 :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      95                 :  * The rule bodies are just ECHO as opposed to what the backend does,
      96                 :  * however.  (But be sure to duplicate code that affects the lexing process,
      97                 :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      98                 :  * whereas scan.l has a separate one for each exclusive state.
      99                 :  */
     100                 : 
     101                 : /*
     102                 :  * OK, here is a short description of lex/flex rules behavior.
     103                 :  * The longest pattern which matches an input string is always chosen.
     104                 :  * For equal-length patterns, the first occurring in the rules list is chosen.
     105                 :  * INITIAL is the starting state, to which all non-conditional rules apply.
     106                 :  * Exclusive states change parsing rules while the state is active.  When in
     107                 :  * an exclusive state, only those rules defined for that state apply.
     108                 :  *
     109                 :  * We use exclusive states for quoted strings, extended comments,
     110                 :  * and to eliminate parsing troubles for numeric strings.
     111                 :  * Exclusive states:
     112                 :  *  <xb> bit string literal
     113                 :  *  <xc> extended C-style comments
     114                 :  *  <xd> delimited identifiers (double-quoted identifiers)
     115                 :  *  <xh> hexadecimal byte string
     116                 :  *  <xq> standard quoted strings
     117                 :  *  <xqs> quote stop (detect continued strings)
     118                 :  *  <xe> extended quoted strings (support backslash escape sequences)
     119                 :  *  <xdolq> $foo$ quoted strings
     120                 :  *  <xui> quoted identifier with Unicode escapes
     121                 :  *  <xus> quoted string with Unicode escapes
     122                 :  *
     123                 :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     124                 :  * no need to distinguish it from <xe> state, and no good way to get out
     125                 :  * of it in error cases.  The backend just throws yyerror() in those
     126                 :  * cases, but that's not an option here.
     127                 :  */
     128                 : 
     129                 : %x xb
     130                 : %x xc
     131                 : %x xd
     132                 : %x xh
     133                 : %x xq
     134                 : %x xqs
     135                 : %x xe
     136                 : %x xdolq
     137                 : %x xui
     138                 : %x xus
     139                 : 
     140                 : /*
     141                 :  * In order to make the world safe for Windows and Mac clients as well as
     142                 :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     143                 :  * sequence will be seen as two successive newlines, but that doesn't cause
     144                 :  * any problems.  Comments that start with -- and extend to the next
     145                 :  * newline are treated as equivalent to a single whitespace character.
     146                 :  *
     147                 :  * NOTE a fine point: if there is no newline following --, we will absorb
     148                 :  * everything to the end of the input as a comment.  This is correct.  Older
     149                 :  * versions of Postgres failed to recognize -- as a comment if the input
     150                 :  * did not end with a newline.
     151                 :  *
     152                 :  * XXX perhaps \f (formfeed) should be treated as a newline as well?
     153                 :  *
     154                 :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     155                 :  * to agree.
     156                 :  */
     157                 : 
     158                 : space           [ \t\n\r\f]
     159                 : horiz_space     [ \t\f]
     160                 : newline         [\n\r]
     161                 : non_newline     [^\n\r]
     162                 : 
     163                 : comment         ("--"{non_newline}*)
     164                 : 
     165                 : whitespace      ({space}+|{comment})
     166                 : 
     167                 : /*
     168                 :  * SQL requires at least one newline in the whitespace separating
     169                 :  * string literals that are to be concatenated.  Silly, but who are we
     170                 :  * to argue?  Note that {whitespace_with_newline} should not have * after
     171                 :  * it, whereas {whitespace} should generally have a * after it...
     172                 :  */
     173                 : 
     174                 : special_whitespace      ({space}+|{comment}{newline})
     175                 : horiz_whitespace        ({horiz_space}|{comment})
     176                 : whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
     177                 : 
     178                 : quote           '
     179                 : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     180                 : quotecontinue   {whitespace_with_newline}{quote}
     181                 : 
     182                 : /*
     183                 :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     184                 :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     185                 :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     186                 :  * to see if there's another dash --- which would start a {comment} and thus
     187                 :  * allow continuation of the {quotecontinue} token.
     188                 :  */
     189                 : quotecontinuefail   {whitespace}*"-"?
     190                 : 
     191                 : /* Bit string
     192                 :  * It is tempting to scan the string for only those characters
     193                 :  * which are allowed. However, this leads to silently swallowed
     194                 :  * characters if illegal characters are included in the string.
     195                 :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     196                 :  * as a zero-length string, and the ABCD' is lost!
     197                 :  * Better to pass the string forward and let the input routines
     198                 :  * validate the contents.
     199                 :  */
     200                 : xbstart         [bB]{quote}
     201                 : xbinside        [^']*
     202                 : 
     203                 : /* Hexadecimal byte string */
     204                 : xhstart         [xX]{quote}
     205                 : xhinside        [^']*
     206                 : 
     207                 : /* National character */
     208                 : xnstart         [nN]{quote}
     209                 : 
     210                 : /* Quoted string that allows backslash escapes */
     211                 : xestart         [eE]{quote}
     212                 : xeinside        [^\\']+
     213                 : xeescape        [\\][^0-7]
     214                 : xeoctesc        [\\][0-7]{1,3}
     215                 : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     216                 : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     217                 : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     218                 : 
     219                 : /* Extended quote
     220                 :  * xqdouble implements embedded quote, ''''
     221                 :  */
     222                 : xqstart         {quote}
     223                 : xqdouble        {quote}{quote}
     224                 : xqinside        [^']+
     225                 : 
     226                 : /* $foo$ style quotes ("dollar quoting")
     227                 :  * The quoted string starts with $foo$ where "foo" is an optional string
     228                 :  * in the form of an identifier, except that it may not contain "$",
     229                 :  * and extends to the first occurrence of an identical string.
     230                 :  * There is *no* processing of the quoted text.
     231                 :  *
     232                 :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     233                 :  * fails to match its trailing "$".
     234                 :  */
     235                 : dolq_start      [A-Za-z\200-\377_]
     236                 : dolq_cont       [A-Za-z\200-\377_0-9]
     237                 : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     238                 : dolqfailed      \${dolq_start}{dolq_cont}*
     239                 : dolqinside      [^$]+
     240                 : 
     241                 : /* Double quote
     242                 :  * Allows embedded spaces and other special characters into identifiers.
     243                 :  */
     244                 : dquote          \"
     245                 : xdstart         {dquote}
     246                 : xdstop          {dquote}
     247                 : xddouble        {dquote}{dquote}
     248                 : xdinside        [^"]+
     249                 : 
     250                 : /* Quoted identifier with Unicode escapes */
     251                 : xuistart        [uU]&{dquote}
     252                 : 
     253                 : /* Quoted string with Unicode escapes */
     254                 : xusstart        [uU]&{quote}
     255                 : 
     256                 : /* error rule to avoid backup */
     257                 : xufailed        [uU]&
     258                 : 
     259                 : 
     260                 : /* C-style comments
     261                 :  *
     262                 :  * The "extended comment" syntax closely resembles allowable operator syntax.
     263                 :  * The tricky part here is to get lex to recognize a string starting with
     264                 :  * slash-star as a comment, when interpreting it as an operator would produce
     265                 :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     266                 :  * have something like plus-slash-star, lex will think this is a 3-character
     267                 :  * operator whereas we want to see it as a + operator and a comment start.
     268                 :  * The solution is two-fold:
     269                 :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     270                 :  *    {operator} would. Then the tie-breaker (first matching rule of same
     271                 :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     272                 :  *    in case it contains a star-slash that should terminate the comment.
     273                 :  * 2. In the operator rule, check for slash-star within the operator, and
     274                 :  *    if found throw it back with yyless().  This handles the plus-slash-star
     275                 :  *    problem.
     276                 :  * Dash-dash comments have similar interactions with the operator rule.
     277                 :  */
     278                 : xcstart         \/\*{op_chars}*
     279                 : xcstop          \*+\/
     280                 : xcinside        [^*/]+
     281                 : 
     282                 : ident_start     [A-Za-z\200-\377_]
     283                 : ident_cont      [A-Za-z\200-\377_0-9\$]
     284                 : 
     285                 : identifier      {ident_start}{ident_cont}*
     286                 : 
     287                 : /* Assorted special-case operators and operator-like tokens */
     288                 : typecast        "::"
     289                 : dot_dot         \.\.
     290                 : colon_equals    ":="
     291                 : 
     292                 : /*
     293                 :  * These operator-like tokens (unlike the above ones) also match the {operator}
     294                 :  * rule, which means that they might be overridden by a longer match if they
     295                 :  * are followed by a comment start or a + or - character. Accordingly, if you
     296                 :  * add to this list, you must also add corresponding code to the {operator}
     297                 :  * block to return the correct token in such cases. (This is not needed in
     298                 :  * psqlscan.l since the token value is ignored there.)
     299                 :  */
     300                 : equals_greater  "=>"
     301                 : less_equals     "<="
     302                 : greater_equals  ">="
     303                 : less_greater    "<>"
     304                 : not_equals      "!="
     305                 : 
     306                 : /*
     307                 :  * "self" is the set of chars that should be returned as single-character
     308                 :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     309                 :  * which can be one or more characters long (but if a single-char token
     310                 :  * appears in the "self" set, it is not to be returned as an Op).  Note
     311                 :  * that the sets overlap, but each has some chars that are not in the other.
     312                 :  *
     313                 :  * If you change either set, adjust the character lists appearing in the
     314                 :  * rule for "operator"!
     315                 :  */
     316                 : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     317                 : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     318                 : operator        {op_chars}+
     319                 : 
     320                 : /*
     321                 :  * Numbers
     322                 :  *
     323                 :  * Unary minus is not part of a number here.  Instead we pass it separately to
     324                 :  * the parser, and there it gets coerced via doNegate().
     325                 :  *
     326                 :  * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     327                 :  *
     328                 :  * {realfail} is added to prevent the need for scanner
     329                 :  * backup when the {real} rule fails to match completely.
     330                 :  */
     331                 : decdigit        [0-9]
     332                 : hexdigit        [0-9A-Fa-f]
     333                 : octdigit        [0-7]
     334                 : bindigit        [0-1]
     335                 : 
     336                 : decinteger      {decdigit}(_?{decdigit})*
     337                 : hexinteger      0[xX](_?{hexdigit})+
     338                 : octinteger      0[oO](_?{octdigit})+
     339                 : bininteger      0[bB](_?{bindigit})+
     340                 : 
     341                 : hexfail         0[xX]_?
     342                 : octfail         0[oO]_?
     343                 : binfail         0[bB]_?
     344                 : 
     345                 : numeric         (({decinteger}\.{decinteger}?)|(\.{decinteger}))
     346                 : numericfail     {decdigit}+\.\.
     347                 : 
     348                 : real            ({decinteger}|{numeric})[Ee][-+]?{decinteger}
     349                 : realfail        ({decinteger}|{numeric})[Ee][-+]
     350                 : 
     351                 : decinteger_junk {decinteger}{ident_start}
     352                 : hexinteger_junk {hexinteger}{ident_start}
     353                 : octinteger_junk {octinteger}{ident_start}
     354                 : bininteger_junk {bininteger}{ident_start}
     355                 : numeric_junk    {numeric}{ident_start}
     356                 : real_junk       {real}{ident_start}
     357                 : 
     358                 : param           \${decinteger}
     359                 : param_junk      \${decinteger}{ident_start}
     360                 : 
     361                 : /* psql-specific: characters allowed in variable names */
     362                 : variable_char   [A-Za-z\200-\377_0-9]
     363                 : 
     364                 : other           .
     365                 : 
     366                 : /*
     367                 :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     368                 :  * Other quoted strings must allow some special characters such as single-quote
     369                 :  *  and newline.
     370                 :  * Embedded single-quotes are implemented both in the SQL standard
     371                 :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     372                 :  *  of escaped-quote "\'".
     373                 :  * Other embedded escaped characters are matched explicitly and the leading
     374                 :  *  backslash is dropped from the string.
     375                 :  * Note that xcstart must appear before operator, as explained above!
     376                 :  *  Also whitespace (comment) must appear before operator.
     377                 :  */
     378                 : 
     379                 : %%
     380                 : 
     381                 : %{
     382                 :         /* Declare some local variables inside yylex(), for convenience */
     383                 :         PsqlScanState cur_state = yyextra;
     384 GIC      425182 :         PQExpBuffer output_buf = cur_state->output_buf;
     385          425182 : 
     386                 :         /*
     387                 :          * Force flex into the state indicated by start_state.  This has a
     388                 :          * couple of purposes: it lets some of the functions below set a new
     389                 :          * starting state without ugly direct access to flex variables, and it
     390                 :          * allows us to transition from one flex lexer to another so that we
     391                 :          * can lex different parts of the source string using separate lexers.
     392                 :          */
     393                 :         BEGIN(cur_state->start_state);
     394          425182 : %}
     395                 : 
     396                 : {whitespace}    {
     397                 :                     /*
     398                 :                      * Note that the whitespace rule includes both true
     399 ECB             :                      * whitespace and single-line ("--" style) comments.
     400                 :                      * We suppress whitespace until we have collected some
     401                 :                      * non-whitespace data.  (This interacts with some
     402                 :                      * decisions in MainLoop(); see there for details.)
     403                 :                      */
     404                 :                     if (output_buf->len > 0)
     405 GIC     1061569 :                         ECHO;
     406          997482 :                 }
     407                 : 
     408         1061569 : {xcstart}       {
     409 CBC         306 :                     cur_state->xcdepth = 0;
     410 GIC         306 :                     BEGIN(xc);
     411             306 :                     /* Put back any characters past slash-star; see above */
     412                 :                     yyless(2);
     413             306 :                     ECHO;
     414             306 :                 }
     415                 : 
     416             306 : <xc>{
     417                 : {xcstart}       {
     418               9 :                     cur_state->xcdepth++;
     419               9 :                     /* Put back any characters past slash-star; see above */
     420 ECB             :                     yyless(2);
     421 CBC           9 :                     ECHO;
     422 GIC           9 :                 }
     423 ECB             : 
     424 CBC           9 : {xcstop}        {
     425             315 :                     if (cur_state->xcdepth <= 0)
     426             315 :                         BEGIN(INITIAL);
     427 GIC         306 :                     else
     428 ECB             :                         cur_state->xcdepth--;
     429 CBC           9 :                     ECHO;
     430 GIC         315 :                 }
     431 ECB             : 
     432 GIC         315 : {xcinside}      {
     433 CBC         716 :                     ECHO;
     434             716 :                 }
     435                 : 
     436             716 : {op_chars}      {
     437             193 :                     ECHO;
     438 GIC         193 :                 }
     439 ECB             : 
     440 CBC         193 : \*+             {
     441 LBC           0 :                     ECHO;
     442               0 :                 }
     443                 : } /* <xc> */
     444               0 : 
     445 ECB             : {xbstart}       {
     446 GIC         369 :                     BEGIN(xb);
     447 CBC         369 :                     ECHO;
     448             369 :                 }
     449 ECB             : <xh>{xhinside}    |
     450 GIC         369 : <xb>{xbinside}    {
     451 CBC        1997 :                     ECHO;
     452            1997 :                 }
     453 ECB             : 
     454 GIC        1997 : {xhstart}       {
     455 CBC        1643 :                     /* Hexadecimal bit type.
     456 EUB             :                      * At some point we should simply pass the string
     457                 :                      * forward to the parser and label it there.
     458                 :                      * In the meantime, place a leading "x" on the string
     459                 :                      * to mark it for the input routine as a hex string.
     460                 :                      */
     461 ECB             :                     BEGIN(xh);
     462 CBC        1643 :                     ECHO;
     463            1643 :                 }
     464                 : 
     465            1643 : {xnstart}       {
     466 LBC           0 :                     yyless(1);  /* eat only 'n' this time */
     467               0 :                     ECHO;
     468 UIC           0 :                 }
     469 ECB             : 
     470 LBC           0 : {xqstart}       {
     471 GIC       89353 :                     if (cur_state->std_strings)
     472           89353 :                         BEGIN(xq);
     473           89299 :                     else
     474                 :                         BEGIN(xe);
     475              54 :                     ECHO;
     476           89353 :                 }
     477 ECB             : {xestart}       {
     478 CBC       89353 :                     BEGIN(xe);
     479 GIC         560 :                     ECHO;
     480 CBC         560 :                 }
     481 EUB             : {xusstart}      {
     482 GBC         560 :                     BEGIN(xus);
     483             136 :                     ECHO;
     484 GIC         136 :                 }
     485 EUB             : 
     486 CBC         136 : <xb,xh,xq,xe,xus>{quote} {
     487           92061 :                     /*
     488 ECB             :                      * When we are scanning a quoted string and see an end
     489                 :                      * quote, we must look ahead for a possible continuation.
     490                 :                      * If we don't see one, we know the end quote was in fact
     491                 :                      * the end of the string.  To reduce the lexer table size,
     492                 :                      * we use a single "xqs" state to do the lookahead for all
     493                 :                      * types of strings.
     494                 :                      */
     495                 :                     cur_state->state_before_str_stop = YYSTATE;
     496 GIC       92061 :                     BEGIN(xqs);
     497 CBC       92061 :                     ECHO;
     498           92061 :                 }
     499 ECB             : <xqs>{quotecontinue} {
     500 GIC       92061 :                     /*
     501 LBC           0 :                      * Found a quote continuation, so return to the in-quote
     502 ECB             :                      * state and continue scanning the literal.  Nothing is
     503                 :                      * added to the literal's contents.
     504                 :                      */
     505                 :                     BEGIN(cur_state->state_before_str_stop);
     506 UIC           0 :                     ECHO;
     507               0 :                 }
     508                 : <xqs>{quotecontinuefail} |
     509               0 : <xqs>{other}  {
     510 GIC       91762 :                     /*
     511 ECB             :                      * Failed to see a quote continuation.  Throw back
     512                 :                      * everything after the end quote, and handle the string
     513                 :                      * according to the state we were in previously.
     514                 :                      */
     515                 :                     yyless(0);
     516 GBC       91762 :                     BEGIN(INITIAL);
     517 GIC       91762 :                     /* There's nothing to echo ... */
     518                 :                 }
     519                 : 
     520           91762 : <xq,xe,xus>{xqdouble} {
     521 GBC        3085 :                     ECHO;
     522            3085 :                 }
     523                 : <xq,xus>{xqinside}  {
     524            3085 :                     ECHO;
     525 CBC       94455 :                 }
     526                 : <xe>{xeinside}  {
     527 GIC       94455 :                     ECHO;
     528            1309 :                 }
     529                 : <xe>{xeunicode} {
     530            1309 :                     ECHO;
     531 CBC          78 :                 }
     532 ECB             : <xe>{xeunicodefail}   {
     533 GIC          78 :                     ECHO;
     534               6 :                 }
     535 ECB             : <xe>{xeescape}  {
     536 CBC           6 :                     ECHO;
     537             715 :                 }
     538                 : <xe>{xeoctesc}  {
     539             715 :                     ECHO;
     540              12 :                 }
     541                 : <xe>{xehexesc}  {
     542              12 :                     ECHO;
     543               6 :                 }
     544                 : <xe>.         {
     545               6 :                     /* This is only needed for \ just before EOF */
     546 LBC           0 :                     ECHO;
     547 UIC           0 :                 }
     548 ECB             : 
     549 LBC           0 : {dolqdelim}     {
     550 GIC        3220 :                     cur_state->dolqstart = pg_strdup(yytext);
     551 CBC        3220 :                     BEGIN(xdolq);
     552            3220 :                     ECHO;
     553 GIC        3220 :                 }
     554 ECB             : {dolqfailed}    {
     555 CBC        3220 :                     /* throw back all but the initial "$" */
     556 UIC           0 :                     yyless(1);
     557 LBC           0 :                     ECHO;
     558               0 :                 }
     559                 : <xdolq>{dolqdelim} {
     560               0 :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     561 GBC        3406 :                     {
     562 EUB             :                         free(cur_state->dolqstart);
     563 GIC        3220 :                         cur_state->dolqstart = NULL;
     564 GBC        3220 :                         BEGIN(INITIAL);
     565 CBC        3220 :                     }
     566 ECB             :                     else
     567                 :                     {
     568                 :                         /*
     569                 :                          * When we fail to match $...$ to dolqstart, transfer
     570                 :                          * the $... part to the output, but put back the final
     571 EUB             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     572                 :                          */
     573                 :                         yyless(yyleng - 1);
     574 GIC         186 :                     }
     575 EUB             :                     ECHO;
     576 CBC        3406 :                 }
     577                 : <xdolq>{dolqinside} {
     578            3406 :                     ECHO;
     579           16945 :                 }
     580 ECB             : <xdolq>{dolqfailed} {
     581 GIC       16945 :                     ECHO;
     582             541 :                 }
     583                 : <xdolq>.      {
     584             541 :                     /* This is only needed for $ inside the quoted text */
     585            1132 :                     ECHO;
     586            1132 :                 }
     587                 : 
     588            1132 : {xdstart}       {
     589 CBC        4289 :                     BEGIN(xd);
     590 GIC        4289 :                     ECHO;
     591 CBC        4289 :                 }
     592                 : {xuistart}      {
     593            4289 :                     BEGIN(xui);
     594              18 :                     ECHO;
     595 GIC          18 :                 }
     596 ECB             : <xd>{xdstop}  {
     597 CBC          18 :                     BEGIN(INITIAL);
     598 GIC        4289 :                     ECHO;
     599 CBC        4289 :                 }
     600 ECB             : <xui>{dquote} {
     601 CBC        4289 :                     BEGIN(INITIAL);
     602 GIC          18 :                     ECHO;
     603 CBC          18 :                 }
     604 ECB             : <xd,xui>{xddouble}    {
     605 CBC          18 :                     ECHO;
     606              33 :                 }
     607                 : <xd,xui>{xdinside}    {
     608              33 :                     ECHO;
     609            4340 :                 }
     610 ECB             : 
     611 GIC        4340 : {xufailed}  {
     612 LBC           0 :                     /* throw back all but the initial u/U */
     613 ECB             :                     yyless(1);
     614 LBC           0 :                     ECHO;
     615 UIC           0 :                 }
     616 ECB             : 
     617 LBC           0 : {typecast}      {
     618 CBC       21950 :                     ECHO;
     619 GIC       21950 :                 }
     620 ECB             : 
     621 CBC       21950 : {dot_dot}       {
     622 UIC           0 :                     ECHO;
     623 LBC           0 :                 }
     624 ECB             : 
     625 UIC           0 : {colon_equals}  {
     626 CBC        1512 :                     ECHO;
     627 GBC        1512 :                 }
     628                 : 
     629            1512 : {equals_greater} {
     630             316 :                     ECHO;
     631 GIC         316 :                 }
     632 EUB             : 
     633 CBC         316 : {less_equals}   {
     634            1117 :                     ECHO;
     635 GIC        1117 :                 }
     636 ECB             : 
     637 GBC        1117 : {greater_equals} {
     638             959 :                     ECHO;
     639 GIC         959 :                 }
     640 EUB             : 
     641 CBC         959 : {less_greater}  {
     642             535 :                     ECHO;
     643 GIC         535 :                 }
     644 ECB             : 
     645 CBC         535 : {not_equals}    {
     646            1037 :                     ECHO;
     647 GIC        1037 :                 }
     648 ECB             : 
     649 CBC        1037 :     /*
     650 ECB             :      * These rules are specific to psql --- they implement parenthesis
     651                 :      * counting and detection of command-ending semicolon.  These must
     652                 :      * appear before the {self} rule so that they take precedence over it.
     653                 :      */
     654                 : 
     655 GIC      144470 : "("               {
     656 ECB             :                     cur_state->paren_depth++;
     657 CBC      144470 :                     ECHO;
     658          144470 :                 }
     659                 : 
     660          144470 : ")"               {
     661          144463 :                     if (cur_state->paren_depth > 0)
     662          144463 :                         cur_state->paren_depth--;
     663 GIC      144463 :                     ECHO;
     664 CBC      144463 :                 }
     665                 : 
     666 GIC      144463 : ";"               {
     667          142199 :                     ECHO;
     668          142199 :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     669          142199 :                     {
     670 ECB             :                         /* Terminate lexing temporarily */
     671                 :                         cur_state->start_state = YY_START;
     672 CBC      142127 :                         cur_state->identifier_count = 0;
     673          142127 :                         return LEXRES_SEMI;
     674 GIC      142127 :                     }
     675 ECB             :                 }
     676                 : 
     677 CBC          72 :     /*
     678 ECB             :      * psql-specific rules to handle backslash commands and variable
     679                 :      * substitution.  We want these before {self}, also.
     680                 :      */
     681                 : 
     682 CBC         377 : "\\"[;:]      {
     683 ECB             :                     /* Force a semi-colon or colon into the query buffer */
     684                 :                     psqlscan_emit(cur_state, yytext + 1, 1);
     685 GIC         377 :                     if (yytext[1] == ';')
     686             377 :                         cur_state->identifier_count = 0;
     687 CBC         377 :                 }
     688 ECB             : 
     689 CBC         377 : "\\"          {
     690 GIC        6648 :                     /* Terminate lexing temporarily */
     691                 :                     cur_state->start_state = YY_START;
     692 CBC        6648 :                     return LEXRES_BACKSLASH;
     693 GIC        6648 :                 }
     694                 : 
     695                 : :{variable_char}+   {
     696            1366 :                     /* Possible psql variable substitution */
     697 ECB             :                     char       *varname;
     698                 :                     char       *value;
     699                 : 
     700                 :                     varname = psqlscan_extract_substring(cur_state,
     701 CBC        1366 :                                                          yytext + 1,
     702            1366 :                                                          yyleng - 1);
     703 GIC        1366 :                     if (cur_state->callbacks->get_variable)
     704 CBC        1366 :                         value = cur_state->callbacks->get_variable(varname,
     705             770 :                                                                    PQUOTE_PLAIN,
     706                 :                                                                    cur_state->cb_passthrough);
     707 ECB             :                     else
     708                 :                         value = NULL;
     709 GIC         596 : 
     710                 :                     if (value)
     711 CBC        1366 :                     {
     712                 :                         /* It is a variable, check for recursion */
     713                 :                         if (psqlscan_var_is_current_source(cur_state, varname))
     714 GIC         566 :                         {
     715                 :                             /* Recursive expansion --- don't go there */
     716 ECB             :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     717 LBC           0 :                                                               varname);
     718 ECB             :                             /* Instead copy the string as is */
     719                 :                             ECHO;
     720 LBC           0 :                         }
     721                 :                         else
     722                 :                         {
     723                 :                             /* OK, perform substitution */
     724 ECB             :                             psqlscan_push_new_buffer(cur_state, value, varname);
     725 GIC         566 :                             /* yy_scan_string already made buffer active */
     726 ECB             :                         }
     727                 :                         free(value);
     728 GIC         566 :                     }
     729 ECB             :                     else
     730                 :                     {
     731                 :                         /*
     732 EUB             :                          * if the variable doesn't exist we'll copy the string
     733                 :                          * as is
     734                 :                          */
     735                 :                         ECHO;
     736 GIC         800 :                     }
     737                 : 
     738                 :                     free(varname);
     739            1366 :                 }
     740 ECB             : 
     741 GIC        1366 : :'{variable_char}+' {
     742             347 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     743 CBC         347 :                                              PQUOTE_SQL_LITERAL);
     744                 :                 }
     745                 : 
     746 GIC         347 : :\"{variable_char}+\" {
     747               6 :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     748               6 :                                              PQUOTE_SQL_IDENT);
     749                 :                 }
     750                 : 
     751 CBC           6 : :\{\?{variable_char}+\} {
     752 GIC           6 :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     753               6 :                 }
     754 ECB             : 
     755 GIC           6 :     /*
     756 ECB             :      * These rules just avoid the need for scanner backup if one of the
     757                 :      * three rules above fails to match completely.
     758                 :      */
     759                 : 
     760 UIC           0 : :'{variable_char}*  {
     761 ECB             :                     /* Throw back everything but the colon */
     762                 :                     yyless(1);
     763 LBC           0 :                     ECHO;
     764 UIC           0 :                 }
     765                 : 
     766 LBC           0 : :\"{variable_char}*    {
     767               0 :                     /* Throw back everything but the colon */
     768 ECB             :                     yyless(1);
     769 UIC           0 :                     ECHO;
     770 LBC           0 :                 }
     771                 : 
     772 UIC           0 : :\{\?{variable_char}*   {
     773               0 :                     /* Throw back everything but the colon */
     774                 :                     yyless(1);
     775 UBC           0 :                     ECHO;
     776 UIC           0 :                 }
     777                 : :\{ {
     778 UBC           0 :                     /* Throw back everything but the colon */
     779               0 :                     yyless(1);
     780 UIC           0 :                     ECHO;
     781 UBC           0 :                 }
     782 EUB             : 
     783 UIC           0 :     /*
     784 EUB             :      * Back to backend-compatible rules.
     785                 :      */
     786                 : 
     787 GBC      255148 : {self}          {
     788 EUB             :                     ECHO;
     789 GIC      255148 :                 }
     790 EUB             : 
     791 GBC      255148 : {operator}      {
     792 GIC        9094 :                     /*
     793 EUB             :                      * Check for embedded slash-star or dash-dash; those
     794                 :                      * are comment starts, so operator must stop there.
     795                 :                      * Note that slash-star or dash-dash at the first
     796                 :                      * character will match a prior rule, not this one.
     797                 :                      */
     798                 :                     int         nchars = yyleng;
     799 GIC        9094 :                     char       *slashstar = strstr(yytext, "/*");
     800            9094 :                     char       *dashdash = strstr(yytext, "--");
     801            9094 : 
     802 ECB             :                     if (slashstar && dashdash)
     803 GIC        9094 :                     {
     804 ECB             :                         /* if both appear, take the first one */
     805                 :                         if (slashstar > dashdash)
     806 LBC           0 :                             slashstar = dashdash;
     807               0 :                     }
     808                 :                     else if (!slashstar)
     809 GIC        9094 :                         slashstar = dashdash;
     810            9064 :                     if (slashstar)
     811            9094 :                         nchars = slashstar - yytext;
     812              36 : 
     813                 :                     /*
     814 ECB             :                      * For SQL compatibility, '+' and '-' cannot be the
     815                 :                      * last char of a multi-char operator unless the operator
     816                 :                      * contains chars that are not in SQL operators.
     817                 :                      * The idea is to lex '=-' as two operators, but not
     818                 :                      * to forbid operator names like '?-' that could not be
     819                 :                      * sequences of SQL operators.
     820                 :                      */
     821 EUB             :                     if (nchars > 1 &&
     822 GBC        9094 :                         (yytext[nchars - 1] == '+' ||
     823 GIC        8253 :                          yytext[nchars - 1] == '-'))
     824 CBC        8250 :                     {
     825 ECB             :                         int         ic;
     826                 : 
     827                 :                         for (ic = nchars - 2; ic >= 0; ic--)
     828 GIC         273 :                         {
     829                 :                             char c = yytext[ic];
     830             234 :                             if (c == '~' || c == '!' || c == '@' ||
     831             234 :                                 c == '#' || c == '^' || c == '&' ||
     832             192 :                                 c == '|' || c == '`' || c == '?' ||
     833              75 :                                 c == '%')
     834                 :                                 break;
     835                 :                         }
     836                 :                         if (ic < 0)
     837 CBC         207 :                         {
     838 ECB             :                             /*
     839                 :                              * didn't find a qualifying character, so remove
     840                 :                              * all trailing [+-]
     841                 :                              */
     842                 :                             do {
     843                 :                                 nchars--;
     844 GIC          39 :                             } while (nchars > 1 &&
     845 CBC          39 :                                  (yytext[nchars - 1] == '+' ||
     846              18 :                                   yytext[nchars - 1] == '-'));
     847              18 :                         }
     848 ECB             :                     }
     849                 : 
     850                 :                     if (nchars < yyleng)
     851 GIC        9094 :                     {
     852 ECB             :                         /* Strip the unwanted chars from the token */
     853                 :                         yyless(nchars);
     854 GIC          75 :                     }
     855                 :                     ECHO;
     856            9094 :                 }
     857                 : 
     858            9094 : {param}         {
     859 CBC         227 :                     ECHO;
     860             227 :                 }
     861 ECB             : {param_junk}    {
     862 CBC         227 :                     ECHO;
     863 GIC           3 :                 }
     864                 : 
     865 GNC           3 : {decinteger}    {
     866 CBC       84962 :                     ECHO;
     867 GIC       84962 :                 }
     868                 : {hexinteger}    {
     869 CBC       84962 :                     ECHO;
     870 GIC          30 :                 }
     871                 : {octinteger}    {
     872 GNC          30 :                     ECHO;
     873              30 :                 }
     874                 : {bininteger}    {
     875              30 :                     ECHO;
     876              30 :                 }
     877                 : {hexfail}       {
     878              30 :                     ECHO;
     879               3 :                 }
     880                 : {octfail}       {
     881               3 :                     ECHO;
     882               3 :                 }
     883                 : {binfail}       {
     884               3 :                     ECHO;
     885               3 :                 }
     886                 : {numeric}       {
     887               3 :                     ECHO;
     888            3553 :                 }
     889                 : {numericfail}   {
     890 GIC        3553 :                     /* throw back the .., and treat as integer */
     891 LBC           0 :                     yyless(yyleng - 2);
     892               0 :                     ECHO;
     893               0 :                 }
     894                 : {real}          {
     895               0 :                     ECHO;
     896 CBC         159 :                 }
     897                 : {realfail}      {
     898             159 :                     ECHO;
     899               3 :                 }
     900                 : {decinteger_junk}   {
     901 GIC           3 :                     ECHO;
     902 CBC          21 :                 }
     903                 : {hexinteger_junk}   {
     904 GNC          21 :                     ECHO;
     905               6 :                 }
     906                 : {octinteger_junk}   {
     907               6 :                     ECHO;
     908               3 :                 }
     909                 : {bininteger_junk}   {
     910               3 :                     ECHO;
     911               3 :                 }
     912                 : {numeric_junk}  {
     913 GIC           3 :                     ECHO;
     914 CBC          21 :                 }
     915 ECB             : {real_junk}     {
     916 GIC          21 :                     ECHO;
     917 CBC           3 :                 }
     918 ECB             : 
     919 GIC           3 : 
     920 CBC     1043759 : {identifier}    {
     921 ECB             :                     /*
     922                 :                      * We need to track if we are inside a BEGIN .. END block
     923                 :                      * in a function definition, so that semicolons contained
     924                 :                      * therein don't terminate the whole statement.  Short of
     925                 :                      * writing a full parser here, the following heuristic
     926                 :                      * should work.  First, we track whether the beginning of
     927                 :                      * the statement matches CREATE [OR REPLACE]
     928                 :                      * {FUNCTION|PROCEDURE}
     929                 :                      */
     930                 : 
     931                 :                     if (cur_state->identifier_count == 0)
     932 CBC     1043759 :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     933 GBC      146435 : 
     934 EUB             :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     935 GBC     2061423 :                         pg_strcasecmp(yytext, "function") == 0 ||
     936 GIC     2030476 :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     937 GBC     2024046 :                         pg_strcasecmp(yytext, "or") == 0 ||
     938 CBC     2019705 :                         pg_strcasecmp(yytext, "replace") == 0)
     939 GIC     1008471 :                     {
     940 ECB             :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     941 CBC       36281 :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     942 GIC       32715 :                     }
     943 ECB             : 
     944                 :                     cur_state->identifier_count++;
     945 GIC     1043759 : 
     946 ECB             :                     if (cur_state->identifiers[0] == 'c' &&
     947 CBC     1043759 :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     948 GIC      258385 :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     949 CBC      233929 :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     950           10368 :                         cur_state->paren_depth == 0)
     951 GIC       31020 :                     {
     952 ECB             :                         if (pg_strcasecmp(yytext, "begin") == 0)
     953 CBC       25061 :                             cur_state->begin_depth++;
     954 GIC          36 :                         else if (pg_strcasecmp(yytext, "case") == 0)
     955 CBC       25025 :                         {
     956 ECB             :                             /*
     957                 :                              * CASE also ends with END.  We only need to track
     958                 :                              * this if we are already inside a BEGIN.
     959                 :                              */
     960                 :                             if (cur_state->begin_depth >= 1)
     961 CBC           3 :                                 cur_state->begin_depth++;
     962               3 :                         }
     963                 :                         else if (pg_strcasecmp(yytext, "end") == 0)
     964 GIC       25022 :                         {
     965                 :                             if (cur_state->begin_depth > 0)
     966              39 :                                 cur_state->begin_depth--;
     967              39 :                         }
     968                 :                     }
     969                 : 
     970                 :                     ECHO;
     971         1043759 :                 }
     972                 : 
     973         1043759 : {other}         {
     974 LBC           0 :                     ECHO;
     975               0 :                 }
     976                 : 
     977               0 : <<EOF>>         {
     978 CBC      276973 :                     if (cur_state->buffer_stack == NULL)
     979          276973 :                     {
     980 ECB             :                         cur_state->start_state = YY_START;
     981 CBC      276407 :                         return LEXRES_EOL;      /* end of input reached */
     982 GIC      276407 :                     }
     983 ECB             : 
     984                 :                     /*
     985                 :                      * We were expanding a variable, so pop the inclusion
     986                 :                      * stack and keep lexing
     987                 :                      */
     988                 :                     psqlscan_pop_buffer_stack(cur_state);
     989 CBC         566 :                     psqlscan_select_top_buffer(cur_state);
     990             566 :                 }
     991 ECB             : 
     992 CBC         566 : %%
     993 LBC           0 : 
     994                 : /* LCOV_EXCL_STOP */
     995 ECB             : 
     996                 : /*
     997                 :  * Create a lexer working state struct.
     998                 :  *
     999                 :  * callbacks is a struct of function pointers that encapsulate some
    1000                 :  * behavior we need from the surrounding program.  This struct must
    1001                 :  * remain valid for the lifespan of the PsqlScanState.
    1002                 :  */
    1003                 : PsqlScanState
    1004                 : psql_scan_create(const PsqlScanCallbacks *callbacks)
    1005 GIC        6233 : {
    1006 ECB             :     PsqlScanState state;
    1007                 : 
    1008                 :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
    1009 CBC        6233 : 
    1010                 :     state->callbacks = callbacks;
    1011 GIC        6233 : 
    1012                 :     yylex_init(&state->scanner);
    1013 CBC        6233 : 
    1014                 :     yyset_extra(state, state->scanner);
    1015            6233 : 
    1016 EUB             :     psql_scan_reset(state);
    1017 GBC        6233 : 
    1018                 :     return state;
    1019            6233 : }
    1020 ECB             : 
    1021                 : /*
    1022                 :  * Destroy a lexer working state struct, releasing all resources.
    1023                 :  */
    1024                 : void
    1025                 : psql_scan_destroy(PsqlScanState state)
    1026 GIC        6186 : {
    1027                 :     psql_scan_finish(state);
    1028            6186 : 
    1029                 :     psql_scan_reset(state);
    1030            6186 : 
    1031 ECB             :     yylex_destroy(state->scanner);
    1032 CBC        6186 : 
    1033                 :     free(state);
    1034            6186 : }
    1035 GBC        6186 : 
    1036                 : /*
    1037                 :  * Set the callback passthrough pointer for the lexer.
    1038                 :  *
    1039                 :  * This could have been integrated into psql_scan_create, but keeping it
    1040                 :  * separate allows the application to change the pointer later, which might
    1041                 :  * be useful.
    1042                 :  */
    1043                 : void
    1044                 : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
    1045 GIC        5971 : {
    1046                 :     state->cb_passthrough = passthrough;
    1047 CBC        5971 : }
    1048 GIC        5971 : 
    1049                 : /*
    1050                 :  * Set up to perform lexing of the given input line.
    1051 ECB             :  *
    1052                 :  * The text at *line, extending for line_len bytes, will be scanned by
    1053                 :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1054                 :  * be called when scanning is complete.  Note that the lexer retains
    1055                 :  * a pointer to the storage at *line --- this string must not be altered
    1056                 :  * or freed until after psql_scan_finish is called.
    1057                 :  *
    1058                 :  * encoding is the libpq identifier for the character encoding in use,
    1059                 :  * and std_strings says whether standard_conforming_strings is on.
    1060                 :  */
    1061                 : void
    1062                 : psql_scan_setup(PsqlScanState state,
    1063 GIC      276494 :                 const char *line, int line_len,
    1064                 :                 int encoding, bool std_strings)
    1065                 : {
    1066                 :     /* Mustn't be scanning already */
    1067                 :     Assert(state->scanbufhandle == NULL);
    1068 CBC      276494 :     Assert(state->buffer_stack == NULL);
    1069 GIC      276494 : 
    1070 ECB             :     /* Do we need to hack the character set encoding? */
    1071                 :     state->encoding = encoding;
    1072 CBC      276494 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1073 GIC      276494 : 
    1074 ECB             :     /* Save standard-strings flag as well */
    1075                 :     state->std_strings = std_strings;
    1076 CBC      276494 : 
    1077 ECB             :     /* Set up flex input buffer with appropriate translation and padding */
    1078                 :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1079 GIC      276494 :                                                    &state->scanbuf);
    1080                 :     state->scanline = line;
    1081          276494 : 
    1082                 :     /* Set lookaside data in case we have to map unsafe encoding */
    1083                 :     state->curline = state->scanbuf;
    1084          276494 :     state->refline = state->scanline;
    1085          276494 : }
    1086          276494 : 
    1087 ECB             : /*
    1088                 :  * Do lexical analysis of SQL command text.
    1089                 :  *
    1090                 :  * The text previously passed to psql_scan_setup is scanned, and appended
    1091                 :  * (possibly with transformation) to query_buf.
    1092                 :  *
    1093                 :  * The return value indicates the condition that stopped scanning:
    1094                 :  *
    1095                 :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1096                 :  * transferred to query_buf.)  The command accumulated in query_buf should
    1097                 :  * be executed, then clear query_buf and call again to scan the remainder
    1098                 :  * of the line.
    1099                 :  *
    1100                 :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1101                 :  * Any previous data on the line has been transferred to query_buf.
    1102                 :  * The caller will typically next apply a separate flex lexer to scan
    1103                 :  * the special command.
    1104                 :  *
    1105                 :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1106                 :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1107                 :  *
    1108                 :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1109                 :  * reason to consider the command incomplete.  The caller may or may not
    1110                 :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1111                 :  * the caller chooses to collect more input.
    1112                 :  *
    1113                 :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1114                 :  * be called next, then the cycle may be repeated with a fresh input line.
    1115                 :  *
    1116                 :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1117                 :  * next line-input operation.
    1118                 :  */
    1119                 : PsqlScanResult
    1120                 : psql_scan(PsqlScanState state,
    1121 CBC      425182 :           PQExpBuffer query_buf,
    1122                 :           promptStatus_t *prompt)
    1123 ECB             : {
    1124                 :     PsqlScanResult result;
    1125                 :     int         lexresult;
    1126                 : 
    1127                 :     /* Must be scanning already */
    1128                 :     Assert(state->scanbufhandle != NULL);
    1129 GIC      425182 : 
    1130                 :     /* Set current output target */
    1131                 :     state->output_buf = query_buf;
    1132          425182 : 
    1133                 :     /* Set input source */
    1134                 :     if (state->buffer_stack != NULL)
    1135          425182 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1136              45 :     else
    1137                 :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1138          425137 : 
    1139                 :     /* And lex. */
    1140                 :     lexresult = yylex(NULL, state->scanner);
    1141          425182 : 
    1142                 :     /*
    1143                 :      * Check termination state and return appropriate result info.
    1144                 :      */
    1145                 :     switch (lexresult)
    1146          425182 :     {
    1147                 :         case LEXRES_EOL:        /* end of input */
    1148          276407 :             switch (state->start_state)
    1149          276407 :             {
    1150                 :                 case INITIAL:
    1151          254483 :                 case xqs:       /* we treat this like INITIAL */
    1152                 :                     if (state->paren_depth > 0)
    1153          254483 :                     {
    1154                 :                         result = PSCAN_INCOMPLETE;
    1155           19146 :                         *prompt = PROMPT_PAREN;
    1156           19146 :                     }
    1157                 :                     else if (state->begin_depth > 0)
    1158          235337 :                     {
    1159                 :                         result = PSCAN_INCOMPLETE;
    1160              78 :                         *prompt = PROMPT_CONTINUE;
    1161              78 :                     }
    1162                 :                     else if (query_buf->len > 0)
    1163 CBC      235259 :                     {
    1164                 :                         result = PSCAN_EOL;
    1165 GIC       44435 :                         *prompt = PROMPT_CONTINUE;
    1166           44435 :                     }
    1167                 :                     else
    1168                 :                     {
    1169                 :                         /* never bother to send an empty buffer */
    1170                 :                         result = PSCAN_INCOMPLETE;
    1171 CBC      190824 :                         *prompt = PROMPT_READY;
    1172 GIC      190824 :                     }
    1173                 :                     break;
    1174 CBC      254483 :                 case xb:
    1175 UIC           0 :                     result = PSCAN_INCOMPLETE;
    1176               0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1177 LBC           0 :                     break;
    1178               0 :                 case xc:
    1179 GIC         367 :                     result = PSCAN_INCOMPLETE;
    1180 CBC         367 :                     *prompt = PROMPT_COMMENT;
    1181 GIC         367 :                     break;
    1182             367 :                 case xd:
    1183 CBC           9 :                     result = PSCAN_INCOMPLETE;
    1184 GIC           9 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1185               9 :                     break;
    1186               9 :                 case xh:
    1187 UIC           0 :                     result = PSCAN_INCOMPLETE;
    1188 LBC           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1189 UIC           0 :                     break;
    1190 LBC           0 :                 case xe:
    1191 CBC         301 :                     result = PSCAN_INCOMPLETE;
    1192 GIC         301 :                     *prompt = PROMPT_SINGLEQUOTE;
    1193 CBC         301 :                     break;
    1194 GIC         301 :                 case xq:
    1195 CBC        4661 :                     result = PSCAN_INCOMPLETE;
    1196 GIC        4661 :                     *prompt = PROMPT_SINGLEQUOTE;
    1197 CBC        4661 :                     break;
    1198            4661 :                 case xdolq:
    1199 GIC       16586 :                     result = PSCAN_INCOMPLETE;
    1200 CBC       16586 :                     *prompt = PROMPT_DOLLARQUOTE;
    1201 GIC       16586 :                     break;
    1202 CBC       16586 :                 case xui:
    1203 LBC           0 :                     result = PSCAN_INCOMPLETE;
    1204 UIC           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1205 LBC           0 :                     break;
    1206 UIC           0 :                 case xus:
    1207 LBC           0 :                     result = PSCAN_INCOMPLETE;
    1208               0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1209 UIC           0 :                     break;
    1210               0 :                 default:
    1211               0 :                     /* can't get here */
    1212                 :                     fprintf(stderr, "invalid YY_START\n");
    1213 LBC           0 :                     exit(1);
    1214               0 :             }
    1215                 :             break;
    1216 CBC      276407 :         case LEXRES_SEMI:       /* semicolon */
    1217 GBC      142127 :             result = PSCAN_SEMICOLON;
    1218          142127 :             *prompt = PROMPT_READY;
    1219          142127 :             break;
    1220          142127 :         case LEXRES_BACKSLASH:  /* backslash */
    1221 CBC        6648 :             result = PSCAN_BACKSLASH;
    1222            6648 :             *prompt = PROMPT_READY;
    1223            6648 :             break;
    1224            6648 :         default:
    1225 LBC           0 :             /* can't get here */
    1226 ECB             :             fprintf(stderr, "invalid yylex result\n");
    1227 LBC           0 :             exit(1);
    1228               0 :     }
    1229 EUB             : 
    1230                 :     return result;
    1231 GBC      425182 : }
    1232 EUB             : 
    1233 ECB             : /*
    1234                 :  * Clean up after scanning a string.  This flushes any unread input and
    1235                 :  * releases resources (but not the PsqlScanState itself).  Note however
    1236                 :  * that this does not reset the lexer scan state; that can be done by
    1237                 :  * psql_scan_reset(), which is an orthogonal operation.
    1238                 :  *
    1239                 :  * It is legal to call this when not scanning anything (makes it easier
    1240                 :  * to deal with error recovery).
    1241                 :  */
    1242                 : void
    1243                 : psql_scan_finish(PsqlScanState state)
    1244 CBC      282631 : {
    1245 EUB             :     /* Drop any incomplete variable expansions. */
    1246                 :     while (state->buffer_stack != NULL)
    1247 GBC      282631 :         psqlscan_pop_buffer_stack(state);
    1248 UBC           0 : 
    1249 EUB             :     /* Done with the outer scan buffer, too */
    1250                 :     if (state->scanbufhandle)
    1251 GBC      282631 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1252          276447 :     state->scanbufhandle = NULL;
    1253          282631 :     if (state->scanbuf)
    1254 GIC      282631 :         free(state->scanbuf);
    1255 GBC      276447 :     state->scanbuf = NULL;
    1256          282631 : }
    1257 GIC      282631 : 
    1258 ECB             : /*
    1259                 :  * Reset lexer scanning state to start conditions.  This is appropriate
    1260                 :  * for executing \r psql commands (or any other time that we discard the
    1261                 :  * prior contents of query_buf).  It is not, however, necessary to do this
    1262                 :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1263                 :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1264                 :  * conditions are returned.
    1265                 :  *
    1266                 :  * Note that this is unrelated to flushing unread input; that task is
    1267 EUB             :  * done by psql_scan_finish().
    1268                 :  */
    1269                 : void
    1270                 : psql_scan_reset(PsqlScanState state)
    1271 GIC       12953 : {
    1272                 :     state->start_state = INITIAL;
    1273 CBC       12953 :     state->paren_depth = 0;
    1274 GIC       12953 :     state->xcdepth = 0;          /* not really necessary */
    1275           12953 :     if (state->dolqstart)
    1276           12953 :         free(state->dolqstart);
    1277 UIC           0 :     state->dolqstart = NULL;
    1278 GIC       12953 :     state->identifier_count = 0;
    1279           12953 :     state->begin_depth = 0;
    1280           12953 : }
    1281           12953 : 
    1282                 : /*
    1283                 :  * Reselect this lexer (psqlscan.l) after using another one.
    1284                 :  *
    1285                 :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1286 ECB             :  * state, because we'd never switch to another lexer in a different state.
    1287                 :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1288                 :  * the same as psql_scan_reset().
    1289                 :  *
    1290 EUB             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1291                 :  * must be a superset of this.
    1292                 :  *
    1293 ECB             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1294                 :  * themselves, since that probably has the value zero in every flex-generated
    1295                 :  * lexer.  But let's not assume that.
    1296                 :  */
    1297                 : void
    1298                 : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1299 CBC       29956 : {
    1300                 :     state->start_state = INITIAL;
    1301 GIC       29956 : }
    1302           29956 : 
    1303                 : /*
    1304                 :  * Return true if lexer is currently in an "inside quotes" state.
    1305                 :  *
    1306                 :  * This is pretty grotty but is needed to preserve the old behavior
    1307                 :  * that mainloop.c drops blank lines not inside quotes without even
    1308                 :  * echoing them.
    1309                 :  */
    1310                 : bool
    1311                 : psql_scan_in_quote(PsqlScanState state)
    1312           60029 : {
    1313 ECB             :     return state->start_state != INITIAL &&
    1314 GIC       60483 :             state->start_state != xqs;
    1315 CBC         454 : }
    1316 ECB             : 
    1317                 : /*
    1318                 :  * Push the given string onto the stack of stuff to scan.
    1319 EUB             :  *
    1320 ECB             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1321                 :  */
    1322                 : void
    1323                 : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1324 GIC         566 :                          const char *varname)
    1325                 : {
    1326                 :     StackElem  *stackelem;
    1327                 : 
    1328                 :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1329             566 : 
    1330                 :     /*
    1331                 :      * In current usage, the passed varname points at the current flex input
    1332                 :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1333                 :      * because that will change the buffer state.
    1334                 :      */
    1335                 :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1336             566 : 
    1337                 :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1338             566 :                                              &stackelem->bufstring);
    1339                 :     state->curline = stackelem->bufstring;
    1340             566 :     if (state->safe_encoding)
    1341 CBC         566 :     {
    1342                 :         stackelem->origstring = NULL;
    1343             566 :         state->refline = stackelem->bufstring;
    1344             566 :     }
    1345                 :     else
    1346                 :     {
    1347                 :         stackelem->origstring = pg_strdup(newstr);
    1348 UIC           0 :         state->refline = stackelem->origstring;
    1349               0 :     }
    1350                 :     stackelem->next = state->buffer_stack;
    1351 GIC         566 :     state->buffer_stack = stackelem;
    1352             566 : }
    1353             566 : 
    1354 ECB             : /*
    1355                 :  * Pop the topmost buffer stack item (there must be one!)
    1356                 :  *
    1357                 :  * NB: after this, the flex input state is unspecified; caller must
    1358                 :  * switch to an appropriate buffer to continue lexing.
    1359                 :  * See psqlscan_select_top_buffer().
    1360                 :  */
    1361                 : void
    1362                 : psqlscan_pop_buffer_stack(PsqlScanState state)
    1363 GIC         566 : {
    1364                 :     StackElem  *stackelem = state->buffer_stack;
    1365             566 : 
    1366 ECB             :     state->buffer_stack = stackelem->next;
    1367 GIC         566 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1368             566 :     free(stackelem->bufstring);
    1369             566 :     if (stackelem->origstring)
    1370             566 :         free(stackelem->origstring);
    1371 LBC           0 :     if (stackelem->varname)
    1372 GIC         566 :         free(stackelem->varname);
    1373             566 :     free(stackelem);
    1374             566 : }
    1375             566 : 
    1376                 : /*
    1377                 :  * Select the topmost surviving buffer as the active input.
    1378 ECB             :  */
    1379                 : void
    1380                 : psqlscan_select_top_buffer(PsqlScanState state)
    1381 GIC         566 : {
    1382 ECB             :     StackElem  *stackelem = state->buffer_stack;
    1383 CBC         566 : 
    1384                 :     if (stackelem != NULL)
    1385             566 :     {
    1386 ECB             :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1387 UIC           0 :         state->curline = stackelem->bufstring;
    1388               0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1389               0 :     }
    1390 EUB             :     else
    1391                 :     {
    1392                 :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1393 CBC         566 :         state->curline = state->scanbuf;
    1394             566 :         state->refline = state->scanline;
    1395             566 :     }
    1396                 : }
    1397 GIC         566 : 
    1398                 : /*
    1399                 :  * Check if specified variable name is the source for any string
    1400                 :  * currently being scanned
    1401                 :  */
    1402                 : bool
    1403                 : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1404             566 : {
    1405 ECB             :     StackElem  *stackelem;
    1406                 : 
    1407                 :     for (stackelem = state->buffer_stack;
    1408 GIC         566 :          stackelem != NULL;
    1409 CBC         566 :          stackelem = stackelem->next)
    1410 LBC           0 :     {
    1411 ECB             :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1412 LBC           0 :             return true;
    1413 UBC           0 :     }
    1414 ECB             :     return false;
    1415 CBC         566 : }
    1416 ECB             : 
    1417                 : /*
    1418                 :  * Set up a flex input buffer to scan the given data.  We always make a
    1419                 :  * copy of the data.  If working in an unsafe encoding, the copy has
    1420                 :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1421                 :  *
    1422                 :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1423                 :  */
    1424                 : YY_BUFFER_STATE
    1425                 : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1426 GIC      277060 :                         char **txtcopy)
    1427 ECB             : {
    1428                 :     char       *newtxt;
    1429 EUB             : 
    1430                 :     /* Flex wants two \0 characters after the actual data */
    1431                 :     newtxt = pg_malloc(len + 2);
    1432 GIC      277060 :     *txtcopy = newtxt;
    1433          277060 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1434          277060 : 
    1435 ECB             :     if (state->safe_encoding)
    1436 CBC      277060 :         memcpy(newtxt, txt, len);
    1437          277060 :     else
    1438                 :     {
    1439 ECB             :         /* Gotta do it the hard way */
    1440                 :         int         i = 0;
    1441 UIC           0 : 
    1442                 :         while (i < len)
    1443               0 :         {
    1444                 :             int         thislen = PQmblen(txt + i, state->encoding);
    1445               0 : 
    1446 ECB             :             /* first byte should always be okay... */
    1447                 :             newtxt[i] = txt[i];
    1448 UIC           0 :             i++;
    1449               0 :             while (--thislen > 0 && i < len)
    1450 LBC           0 :                 newtxt[i++] = (char) 0xFF;
    1451               0 :         }
    1452 EUB             :     }
    1453                 : 
    1454                 :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1455 GBC      277060 : }
    1456                 : 
    1457 ECB             : /*
    1458                 :  * psqlscan_emit() --- body for ECHO macro
    1459                 :  *
    1460                 :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1461                 :  * input data.  If you pass it something that is not part of the yytext
    1462                 :  * string, you are making a mistake.  Internally generated text can be
    1463                 :  * appended directly to state->output_buf.
    1464                 :  */
    1465                 : void
    1466                 : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1467 GIC     3281909 : {
    1468 ECB             :     PQExpBuffer output_buf = state->output_buf;
    1469 GIC     3281909 : 
    1470                 :     if (state->safe_encoding)
    1471         3281909 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1472         3281909 :     else
    1473                 :     {
    1474 ECB             :         /* Gotta do it the hard way */
    1475                 :         const char *reference = state->refline;
    1476 LBC           0 :         int         i;
    1477                 : 
    1478 ECB             :         reference += (txt - state->curline);
    1479 LBC           0 : 
    1480                 :         for (i = 0; i < len; i++)
    1481 UIC           0 :         {
    1482                 :             char        ch = txt[i];
    1483 UBC           0 : 
    1484                 :             if (ch == (char) 0xFF)
    1485               0 :                 ch = reference[i];
    1486 UIC           0 :             appendPQExpBufferChar(output_buf, ch);
    1487 UBC           0 :         }
    1488                 :     }
    1489                 : }
    1490 GBC     3281909 : 
    1491 EUB             : /*
    1492                 :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1493                 :  *
    1494                 :  * This is like psqlscan_emit(), except that the data is returned as a
    1495                 :  * malloc'd string rather than being pushed directly to state->output_buf.
    1496                 :  */
    1497 ECB             : char *
    1498                 : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1499 GIC        2293 : {
    1500                 :     char       *result = (char *) pg_malloc(len + 1);
    1501            2293 : 
    1502                 :     if (state->safe_encoding)
    1503            2293 :         memcpy(result, txt, len);
    1504            2293 :     else
    1505                 :     {
    1506                 :         /* Gotta do it the hard way */
    1507                 :         const char *reference = state->refline;
    1508 UIC           0 :         int         i;
    1509 ECB             : 
    1510                 :         reference += (txt - state->curline);
    1511 LBC           0 : 
    1512                 :         for (i = 0; i < len; i++)
    1513               0 :         {
    1514 ECB             :             char        ch = txt[i];
    1515 UIC           0 : 
    1516                 :             if (ch == (char) 0xFF)
    1517               0 :                 ch = reference[i];
    1518 UBC           0 :             result[i] = ch;
    1519 UIC           0 :         }
    1520                 :     }
    1521 EUB             :     result[len] = '\0';
    1522 GIC        2293 :     return result;
    1523 GBC        2293 : }
    1524                 : 
    1525 EUB             : /*
    1526                 :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1527                 :  *
    1528                 :  * If the variable name is found, escape its value using the appropriate
    1529                 :  * quoting method and emit the value to output_buf.  (Since the result is
    1530                 :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1531                 :  * find the variable or escaping fails, emit the token as-is.
    1532 ECB             :  */
    1533                 : void
    1534                 : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1535 GIC         388 :                          PsqlScanQuoteType quote)
    1536                 : {
    1537                 :     char       *varname;
    1538                 :     char       *value;
    1539                 : 
    1540                 :     /* Variable lookup. */
    1541 ECB             :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1542 GIC         388 :     if (state->callbacks->get_variable)
    1543 CBC         388 :         value = state->callbacks->get_variable(varname, quote,
    1544 GIC         388 :                                                state->cb_passthrough);
    1545 ECB             :     else
    1546                 :         value = NULL;
    1547 UIC           0 :     free(varname);
    1548 GIC         388 : 
    1549                 :     if (value)
    1550 GBC         388 :     {
    1551                 :         /* Emit the suitably-escaped value */
    1552                 :         appendPQExpBufferStr(state->output_buf, value);
    1553             366 :         free(value);
    1554 GIC         366 :     }
    1555 EUB             :     else
    1556                 :     {
    1557                 :         /* Emit original token as-is */
    1558                 :         psqlscan_emit(state, txt, len);
    1559 GBC          22 :     }
    1560 EUB             : }
    1561 GBC         388 : 
    1562                 : void
    1563                 : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1564 CBC          12 : {
    1565 ECB             :     char    *varname;
    1566                 :     char    *value;
    1567                 : 
    1568                 :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1569 GIC          12 :     if (state->callbacks->get_variable)
    1570              12 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1571              12 :                                                state->cb_passthrough);
    1572                 :     else
    1573                 :         value = NULL;
    1574 UIC           0 :     free(varname);
    1575 GIC          12 : 
    1576                 :     if (value != NULL)
    1577 CBC          12 :     {
    1578                 :         psqlscan_emit(state, "TRUE", 4);
    1579 GIC           6 :         free(value);
    1580               6 :     }
    1581                 :     else
    1582                 :     {
    1583                 :         psqlscan_emit(state, "FALSE", 5);
    1584 CBC           6 :     }
    1585 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a