LCOV - Differential Code Coverage HEAD vs 15 - src/interfaces/ecpg/preproc/parser.c

LCOV - differential code coverage report

Current view:	top level - src/interfaces/ecpg/preproc - parser.c (source / functions)		Coverage	Total	Hit	LBC	UIC	GBC	GIC	GNC	CBC	EUB	ECB
Current:	Differential Code Coverage HEAD vs 15	Lines:	92.0 %	88	81	2	5	2	36	11	32	5	47
Current Date:	2023-04-08 15:15:32	Functions:	100.0 %	3	3				2	1			2
Baseline:	15
Baseline Date:	2023-04-08 15:09:40
Legend:	Lines: hit not hit

           TLA  Line data    Source code

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * parser.c
       4                 :  *      Main entry point/driver for PostgreSQL grammar
       5                 :  *
       6                 :  * This should match src/backend/parser/parser.c, except that we do not
       7                 :  * need to bother with re-entrant interfaces.
       8                 :  *
       9                 :  * Note: ECPG doesn't report error location like the backend does.
      10                 :  * This file will need work if we ever want it to.
      11                 :  *
      12                 :  *
      13                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      14                 :  * Portions Copyright (c) 1994, Regents of the University of California
      15                 :  *
      16                 :  * IDENTIFICATION
      17                 :  *    src/interfaces/ecpg/preproc/parser.c
      18                 :  *
      19                 :  *-------------------------------------------------------------------------
      20                 :  */
      21                 : 
      22                 : #include "postgres_fe.h"
      23                 : 
      24                 : #include "preproc_extern.h"
      25                 : #include "preproc.h"
      26                 : 
      27                 : 
      28                 : static bool have_lookahead;     /* is lookahead info valid? */
      29                 : static int  lookahead_token;    /* one-token lookahead */
      30                 : static YYSTYPE lookahead_yylval;    /* yylval for lookahead token */
      31                 : static YYLTYPE lookahead_yylloc;    /* yylloc for lookahead token */
      32                 : static char *lookahead_yytext;  /* start current token */
      33                 : 
      34                 : static bool check_uescapechar(unsigned char escape);
      35                 : static bool ecpg_isspace(char ch);
      36                 : 
      37                 : 
      38                 : /*
      39                 :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      40                 :  *
      41                 :  * This filter is needed because in some cases the standard SQL grammar
      42                 :  * requires more than one token lookahead.  We reduce these cases to one-token
      43                 :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      44                 :  *
      45                 :  * Using a filter is simpler than trying to recognize multiword tokens
      46                 :  * directly in scan.l, because we'd have to allow for comments between the
      47                 :  * words.  Furthermore it's not clear how to do that without re-introducing
      48                 :  * scanner backtrack, which would cost more performance than this filter
      49                 :  * layer does.
      50                 :  *
      51                 :  * We also use this filter to convert UIDENT and USCONST sequences into
      52                 :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      53                 :  * productions in the main grammar, it's more efficient to do it like this.
      54                 :  */
      55                 : int
      56 CBC       34710 : filtered_base_yylex(void)
      57                 : {
      58                 :     int         cur_token;
      59                 :     int         next_token;
      60                 :     YYSTYPE     cur_yylval;
      61                 :     YYLTYPE     cur_yylloc;
      62                 :     char       *cur_yytext;
      63                 : 
      64                 :     /* Get next token --- we might already have it */
      65           34710 :     if (have_lookahead)
      66                 :     {
      67              51 :         cur_token = lookahead_token;
      68              51 :         base_yylval = lookahead_yylval;
      69              51 :         base_yylloc = lookahead_yylloc;
      70              51 :         base_yytext = lookahead_yytext;
      71              51 :         have_lookahead = false;
      72                 :     }
      73                 :     else
      74           34659 :         cur_token = base_yylex();
      75                 : 
      76                 :     /*
      77                 :      * If this token isn't one that requires lookahead, just return it.
      78                 :      */
      79           34710 :     switch (cur_token)
      80                 :     {
      81 GNC          52 :         case FORMAT:
      82 ECB             :         case NOT:
      83                 :         case NULLS_P:
      84                 :         case WITH:
      85                 :         case WITHOUT:
      86                 :         case UIDENT:
      87                 :         case USCONST:
      88 GIC          52 :             break;
      89           34658 :         default:
      90 CBC       34658 :             return cur_token;
      91 ECB             :     }
      92                 : 
      93                 :     /* Save and restore lexer output variables around the call */
      94 GIC          52 :     cur_yylval = base_yylval;
      95              52 :     cur_yylloc = base_yylloc;
      96 CBC          52 :     cur_yytext = base_yytext;
      97 ECB             : 
      98                 :     /* Get next token, saving outputs into lookahead variables */
      99 GIC          52 :     next_token = base_yylex();
     100                 : 
     101 CBC          52 :     lookahead_token = next_token;
     102 GIC          52 :     lookahead_yylval = base_yylval;
     103 CBC          52 :     lookahead_yylloc = base_yylloc;
     104              52 :     lookahead_yytext = base_yytext;
     105 ECB             : 
     106 CBC          52 :     base_yylval = cur_yylval;
     107 GIC          52 :     base_yylloc = cur_yylloc;
     108 CBC          52 :     base_yytext = cur_yytext;
     109 ECB             : 
     110 CBC          52 :     have_lookahead = true;
     111                 : 
     112 ECB             :     /* Replace cur_token if needed, based on lookahead */
     113 GIC          52 :     switch (cur_token)
     114                 :     {
     115 GNC           2 :         case FORMAT:
     116                 :             /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
     117                 :             switch (next_token)
     118                 :             {
     119               2 :                 case JSON:
     120               2 :                     cur_token = FORMAT_LA;
     121               2 :                     break;
     122                 :             }
     123               2 :             break;
     124                 : 
     125 CBC          35 :         case NOT:
     126                 :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     127 ECB             :             switch (next_token)
     128                 :             {
     129 UIC           0 :                 case BETWEEN:
     130                 :                 case IN_P:
     131 ECB             :                 case LIKE:
     132                 :                 case ILIKE:
     133                 :                 case SIMILAR:
     134 UIC           0 :                     cur_token = NOT_LA;
     135 LBC           0 :                     break;
     136                 :             }
     137 CBC          35 :             break;
     138                 : 
     139 GIC           2 :         case NULLS_P:
     140                 :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     141 EUB             :             switch (next_token)
     142                 :             {
     143 GIC           2 :                 case FIRST_P:
     144                 :                 case LAST_P:
     145               2 :                     cur_token = NULLS_LA;
     146 GBC           2 :                     break;
     147 EUB             :             }
     148 GIC           2 :             break;
     149 ECB             : 
     150 GIC           6 :         case WITH:
     151 ECB             :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     152                 :             switch (next_token)
     153                 :             {
     154 GIC           1 :                 case TIME:
     155 ECB             :                 case ORDINALITY:
     156 GIC           1 :                     cur_token = WITH_LA;
     157 CBC           1 :                     break;
     158 ECB             :             }
     159 GIC           6 :             break;
     160                 : 
     161 GNC           4 :         case WITHOUT:
     162                 :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     163                 :             switch (next_token)
     164                 :             {
     165               1 :                 case TIME:
     166               1 :                     cur_token = WITHOUT_LA;
     167               1 :                     break;
     168                 :             }
     169               4 :             break;
     170 CBC           3 :         case UIDENT:
     171                 :         case USCONST:
     172 ECB             :             /* Look ahead for UESCAPE */
     173 GIC           3 :             if (next_token == UESCAPE)
     174                 :             {
     175                 :                 /* Yup, so get third token, which had better be SCONST */
     176 ECB             :                 const char *escstr;
     177                 : 
     178                 :                 /*
     179                 :                  * Again save and restore lexer output variables around the
     180                 :                  * call
     181                 :                  */
     182 GIC           1 :                 cur_yylval = base_yylval;
     183 CBC           1 :                 cur_yylloc = base_yylloc;
     184 GIC           1 :                 cur_yytext = base_yytext;
     185                 : 
     186                 :                 /* Get third token */
     187 CBC           1 :                 next_token = base_yylex();
     188 ECB             : 
     189 CBC           1 :                 if (next_token != SCONST)
     190 UIC           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     191 ECB             : 
     192                 :                 /*
     193                 :                  * Save and check escape string, which the scanner returns
     194                 :                  * with quotes
     195                 :                  */
     196 GIC           1 :                 escstr = base_yylval.str;
     197               1 :                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     198 UIC           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     199                 : 
     200 GIC           1 :                 base_yylval = cur_yylval;
     201               1 :                 base_yylloc = cur_yylloc;
     202               1 :                 base_yytext = cur_yytext;
     203                 : 
     204 ECB             :                 /* Combine 3 tokens into 1 */
     205 CBC           1 :                 base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
     206 ECB             : 
     207                 :                 /* Clear have_lookahead, thereby consuming all three tokens */
     208 GIC           1 :                 have_lookahead = false;
     209 ECB             :             }
     210                 : 
     211 CBC           3 :             if (cur_token == UIDENT)
     212 GBC           1 :                 cur_token = IDENT;
     213 GIC           2 :             else if (cur_token == USCONST)
     214               2 :                 cur_token = SCONST;
     215               3 :             break;
     216                 :     }
     217                 : 
     218 CBC          52 :     return cur_token;
     219 ECB             : }
     220 EUB             : 
     221                 : /*
     222 ECB             :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     223                 :  * in pgc.l.
     224                 :  */
     225                 : 
     226                 : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     227                 : static bool
     228 GIC           1 : check_uescapechar(unsigned char escape)
     229                 : {
     230 CBC           1 :     if (isxdigit(escape)
     231 GIC           1 :         || escape == '+'
     232               1 :         || escape == '\''
     233 CBC           1 :         || escape == '"'
     234               1 :         || ecpg_isspace(escape))
     235 LBC           0 :         return false;
     236 ECB             :     else
     237 CBC           1 :         return true;
     238                 : }
     239                 : 
     240 ECB             : /*
     241                 :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     242                 :  */
     243                 : static bool
     244 GIC           1 : ecpg_isspace(char ch)
     245                 : {
     246               1 :     if (ch == ' ' ||
     247               1 :         ch == '\t' ||
     248               1 :         ch == '\n' ||
     249               1 :         ch == '\r' ||
     250 ECB             :         ch == '\f')
     251 UIC           0 :         return true;
     252 CBC           1 :     return false;
     253 ECB             : }

Generated by: LCOV version v1.16-55-g56c0a2a