LCOV - Differential Code Coverage HEAD vs 15 - src/backend/parser/parser.c

LCOV - differential code coverage report

Current view:	top level - src/backend/parser - parser.c (source / functions)		Coverage	Total	Hit	LBC	UIC	UBC	GBC	GIC	GNC	CBC	EUB	ECB
Current:	Differential Code Coverage HEAD vs 15	Lines:	94.7 %	207	196	6	4	1	3	79	16	98	7	92
Current Date:	2023-04-08 17:13:01	Functions:	100.0 %	6	6					4	1	1		4
Baseline:	15	Line coverage date bins:
Baseline Date:	2023-04-08 15:09:40	[..60] days:	100.0 %	16	16						16
Legend:	Lines: hit not hit	(240..) days:	94.2 %	191	180	6	4	1	3	79		98	7	84
		Function coverage date bins:
		(240..) days:	60.0 %	10	6					4	1	1		4

 Age         Owner                  TLA  Line data    Source code

                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * parser.c
                                  4                 :  *      Main entry point/driver for PostgreSQL grammar
                                  5                 :  *
                                  6                 :  * Note that the grammar is not allowed to perform any table access
                                  7                 :  * (since we need to be able to do basic parsing even while inside an
                                  8                 :  * aborted transaction).  Therefore, the data structures returned by
                                  9                 :  * the grammar are "raw" parsetrees that still need to be analyzed by
                                 10                 :  * analyze.c and related files.
                                 11                 :  *
                                 12                 :  *
                                 13                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 14                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 15                 :  *
                                 16                 :  * IDENTIFICATION
                                 17                 :  *    src/backend/parser/parser.c
                                 18                 :  *
                                 19                 :  *-------------------------------------------------------------------------
                                 20                 :  */
                                 21                 : 
                                 22                 : #include "postgres.h"
                                 23                 : 
                                 24                 : #include "mb/pg_wchar.h"
                                 25                 : #include "gramparse.h"
                                 26                 : #include "parser/parser.h"
                                 27                 : #include "parser/scansup.h"
                                 28                 : 
                                 29                 : static bool check_uescapechar(unsigned char escape);
                                 30                 : static char *str_udeescape(const char *str, char escape,
                                 31                 :                            int position, core_yyscan_t yyscanner);
                                 32                 : 
                                 33                 : 
                                 34                 : /*
                                 35                 :  * raw_parser
                                 36                 :  *      Given a query in string form, do lexical and grammatical analysis.
                                 37                 :  *
                                 38                 :  * Returns a list of raw (un-analyzed) parse trees.  The contents of the
                                 39                 :  * list have the form required by the specified RawParseMode.
                                 40                 :  */
                                 41                 : List *
  825 tgl                        42 CBC      498144 : raw_parser(const char *str, RawParseMode mode)
                                 43                 : {
                                 44                 :     core_yyscan_t yyscanner;
                                 45                 :     base_yy_extra_type yyextra;
                                 46                 :     int         yyresult;
                                 47                 : 
                                 48                 :     /* initialize the flex scanner */
 4899                            49          498144 :     yyscanner = scanner_init(str, &yyextra.core_yy_extra,
                                 50                 :                              &ScanKeywords, ScanKeywordTokens);
                                 51                 : 
                                 52                 :     /* base_yylex() only needs us to initialize the lookahead token, if any */
  825                            53          498144 :     if (mode == RAW_PARSE_DEFAULT)
                                 54          476010 :         yyextra.have_lookahead = false;
                                 55                 :     else
                                 56                 :     {
                                 57                 :         /* this array is indexed by RawParseMode enum */
                                 58                 :         static const int mode_token[] = {
                                 59                 :             0,                  /* RAW_PARSE_DEFAULT */
                                 60                 :             MODE_TYPE_NAME,     /* RAW_PARSE_TYPE_NAME */
                                 61                 :             MODE_PLPGSQL_EXPR,  /* RAW_PARSE_PLPGSQL_EXPR */
                                 62                 :             MODE_PLPGSQL_ASSIGN1,   /* RAW_PARSE_PLPGSQL_ASSIGN1 */
                                 63                 :             MODE_PLPGSQL_ASSIGN2,   /* RAW_PARSE_PLPGSQL_ASSIGN2 */
                                 64                 :             MODE_PLPGSQL_ASSIGN3    /* RAW_PARSE_PLPGSQL_ASSIGN3 */
                                 65                 :         };
                                 66                 : 
                                 67           22134 :         yyextra.have_lookahead = true;
                                 68           22134 :         yyextra.lookahead_token = mode_token[mode];
                                 69           22134 :         yyextra.lookahead_yylloc = 0;
                                 70           22134 :         yyextra.lookahead_end = NULL;
                                 71                 :     }
                                 72                 : 
                                 73                 :     /* initialize the bison parser */
 5018                            74          498144 :     parser_init(&yyextra);
                                 75                 : 
                                 76                 :     /* Parse! */
                                 77          498144 :     yyresult = base_yyparse(yyscanner);
                                 78                 : 
                                 79                 :     /* Clean up (release memory) */
                                 80          497597 :     scanner_finish(yyscanner);
                                 81                 : 
 9173 bruce                      82          497597 :     if (yyresult)               /* error */
 8219 tgl                        83 UBC           0 :         return NIL;
                                 84                 : 
 5018 tgl                        85 CBC      497597 :     return yyextra.parsetree;
                                 86                 : }
                                 87                 : 
                                 88                 : 
                                 89                 : /*
                                 90                 :  * Intermediate filter between parser and core lexer (core_yylex in scan.l).
                                 91                 :  *
                                 92                 :  * This filter is needed because in some cases the standard SQL grammar
                                 93                 :  * requires more than one token lookahead.  We reduce these cases to one-token
                                 94                 :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
                                 95                 :  *
                                 96                 :  * Using a filter is simpler than trying to recognize multiword tokens
                                 97                 :  * directly in scan.l, because we'd have to allow for comments between the
                                 98                 :  * words.  Furthermore it's not clear how to do that without re-introducing
                                 99                 :  * scanner backtrack, which would cost more performance than this filter
                                100                 :  * layer does.
                                101                 :  *
                                102                 :  * We also use this filter to convert UIDENT and USCONST sequences into
                                103                 :  * plain IDENT and SCONST tokens.  While that could be handled by additional
                                104                 :  * productions in the main grammar, it's more efficient to do it like this.
                                105                 :  *
                                106                 :  * The filter also provides a convenient place to translate between
                                107                 :  * the core_YYSTYPE and YYSTYPE representations (which are really the
                                108                 :  * same thing anyway, but notationally they're different).
                                109                 :  */
                                110                 : int
 4899                           111        16891748 : base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner)
                                112                 : {
 5018                           113        16891748 :     base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
                                114                 :     int         cur_token;
                                115                 :     int         next_token;
                                116                 :     int         cur_token_length;
                                117                 :     YYLTYPE     cur_yylloc;
                                118                 : 
                                119                 :     /* Get next token --- we might already have it */
                                120        16891748 :     if (yyextra->have_lookahead)
                                121                 :     {
                                122           90571 :         cur_token = yyextra->lookahead_token;
 4899                           123           90571 :         lvalp->core_yystype = yyextra->lookahead_yylval;
 5018                           124           90571 :         *llocp = yyextra->lookahead_yylloc;
  825                           125           90571 :         if (yyextra->lookahead_end)
                                126           68437 :             *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
 5018                           127           90571 :         yyextra->have_lookahead = false;
                                128                 :     }
                                129                 :     else
 4899                           130        16801177 :         cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner);
                                131                 : 
                                132                 :     /*
                                133                 :      * If this token isn't one that requires lookahead, just return it.  If it
                                134                 :      * does, determine the token length.  (We could get that via strlen(), but
                                135                 :      * since we have such a small set of possibilities, hardwiring seems
                                136                 :      * feasible and more efficient --- at least for the fixed-length cases.)
                                137                 :      */
 6161                           138        16891631 :     switch (cur_token)
                                139                 :     {
   11 alvherre                  140 GNC        1265 :         case FORMAT:
                                141            1265 :             cur_token_length = 6;
                                142            1265 :             break;
 2951 tgl                       143 CBC       33598 :         case NOT:
                                144           33598 :             cur_token_length = 3;
                                145           33598 :             break;
 5934                           146            1148 :         case NULLS_P:
 2966                           147            1148 :             cur_token_length = 5;
                                148            1148 :             break;
                                149           31974 :         case WITH:
                                150           31974 :             cur_token_length = 4;
                                151           31974 :             break;
 1182                           152             133 :         case UIDENT:
 1182 tgl                       153 ECB             :         case USCONST:
 1182 tgl                       154 CBC         133 :             cur_token_length = strlen(yyextra->core_yy_extra.scanbuf + *llocp);
                                155             133 :             break;
   11 alvherre                  156 GNC         379 :         case WITHOUT:
                                157             379 :             cur_token_length = 7;
                                158             379 :             break;
 2966 tgl                       159 GIC    16823134 :         default:
 2966 tgl                       160 CBC    16823134 :             return cur_token;
 2966 tgl                       161 ECB             :     }
 5624 bruce                     162                 : 
 2966 tgl                       163                 :     /*
                                164                 :      * Identify end+1 of current token.  core_yylex() has temporarily stored a
                                165                 :      * '\0' here, and will undo that when we call it again.  We need to redo
                                166                 :      * it to fully revert the lookahead call for error reporting purposes.
                                167                 :      */
 2966 tgl                       168 GIC       68497 :     yyextra->lookahead_end = yyextra->core_yy_extra.scanbuf +
                                169           68497 :         *llocp + cur_token_length;
                                170           68497 :     Assert(*(yyextra->lookahead_end) == '\0');
                                171                 : 
                                172                 :     /*
                                173                 :      * Save and restore *llocp around the call.  It might look like we could
 2966 tgl                       174 ECB             :      * avoid this by just passing &lookahead_yylloc to core_yylex(), but that
                                175                 :      * does not work because flex actually holds onto the last-passed pointer
                                176                 :      * internally, and will use that for error reporting.  We need any error
                                177                 :      * reports to point to the current token, not the next one.
                                178                 :      */
 2966 tgl                       179 GIC       68497 :     cur_yylloc = *llocp;
                                180                 : 
                                181                 :     /* Get next token, saving outputs into lookahead variables */
                                182           68497 :     next_token = core_yylex(&(yyextra->lookahead_yylval), llocp, yyscanner);
                                183           68497 :     yyextra->lookahead_token = next_token;
                                184           68497 :     yyextra->lookahead_yylloc = *llocp;
 2966 tgl                       185 ECB             : 
 2966 tgl                       186 GIC       68497 :     *llocp = cur_yylloc;
                                187                 : 
 2966 tgl                       188 ECB             :     /* Now revert the un-truncation of the current token */
 2966 tgl                       189 CBC       68497 :     yyextra->lookahead_hold_char = *(yyextra->lookahead_end);
                                190           68497 :     *(yyextra->lookahead_end) = '\0';
                                191                 : 
                                192           68497 :     yyextra->have_lookahead = true;
                                193                 : 
                                194                 :     /* Replace cur_token if needed, based on lookahead */
                                195           68497 :     switch (cur_token)
 2966 tgl                       196 ECB             :     {
   11 alvherre                  197 GNC        1265 :         case FORMAT:
                                198                 :             /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
                                199                 :             switch (next_token)
                                200                 :             {
                                201             187 :                 case JSON:
                                202             187 :                     cur_token = FORMAT_LA;
                                203             187 :                     break;
                                204                 :             }
                                205            1265 :             break;
                                206                 : 
 2951 tgl                       207 GIC       33598 :         case NOT:
 2951 tgl                       208 ECB             :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
                                209                 :             switch (next_token)
                                210                 :             {
 2951 tgl                       211 CBC        2985 :                 case BETWEEN:
                                212                 :                 case IN_P:
 2951 tgl                       213 ECB             :                 case LIKE:
                                214                 :                 case ILIKE:
                                215                 :                 case SIMILAR:
 2951 tgl                       216 GIC        2985 :                     cur_token = NOT_LA;
 2951 tgl                       217 CBC        2985 :                     break;
 2951 tgl                       218 ECB             :             }
 2951 tgl                       219 CBC       33598 :             break;
                                220                 : 
 2966                           221            1148 :         case NULLS_P:
                                222                 :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
 5934 tgl                       223 ECB             :             switch (next_token)
                                224                 :             {
 5934 tgl                       225 GIC         794 :                 case FIRST_P:
                                226                 :                 case LAST_P:
 2966 tgl                       227 CBC         794 :                     cur_token = NULLS_LA;
 5934 tgl                       228 GIC         794 :                     break;
                                229                 :             }
                                230            1148 :             break;
                                231                 : 
 5934 tgl                       232 CBC       31974 :         case WITH:
 2966 tgl                       233 ECB             :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
                                234                 :             switch (next_token)
 6161                           235                 :             {
 5276 peter_e                   236 GIC        1647 :                 case TIME:
 3541 stark                     237 ECB             :                 case ORDINALITY:
 2966 tgl                       238 GIC        1647 :                     cur_token = WITH_LA;
 6161                           239            1647 :                     break;
                                240                 :             }
 6161 tgl                       241 CBC       31974 :             break;
                                242                 : 
   11 alvherre                  243 GNC         379 :         case WITHOUT:
                                244                 :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
                                245                 :             switch (next_token)
                                246                 :             {
    5                           247             250 :                 case TIME:
   11                           248             250 :                     cur_token = WITHOUT_LA;
                                249             250 :                     break;
                                250                 :             }
                                251             379 :             break;
                                252                 : 
 1182 tgl                       253 CBC         133 :         case UIDENT:
 1182 tgl                       254 ECB             :         case USCONST:
                                255                 :             /* Look ahead for UESCAPE */
 1182 tgl                       256 CBC         133 :             if (next_token == UESCAPE)
                                257                 :             {
 1182 tgl                       258 ECB             :                 /* Yup, so get third token, which had better be SCONST */
                                259                 :                 const char *escstr;
                                260                 : 
                                261                 :                 /* Again save and restore *llocp */
 1182 tgl                       262 CBC          22 :                 cur_yylloc = *llocp;
                                263                 : 
 1182 tgl                       264 ECB             :                 /* Un-truncate current token so errors point to third token */
 1182 tgl                       265 CBC          22 :                 *(yyextra->lookahead_end) = yyextra->lookahead_hold_char;
                                266                 : 
 1182 tgl                       267 ECB             :                 /* Get third token */
 1182 tgl                       268 GIC          22 :                 next_token = core_yylex(&(yyextra->lookahead_yylval),
 1182 tgl                       269 ECB             :                                         llocp, yyscanner);
                                270                 : 
                                271                 :                 /* If we throw error here, it will point to third token */
 1182 tgl                       272 GIC          22 :                 if (next_token != SCONST)
 1182 tgl                       273 CBC           3 :                     scanner_yyerror("UESCAPE must be followed by a simple string literal",
 1182 tgl                       274 ECB             :                                     yyscanner);
                                275                 : 
 1182 tgl                       276 GIC          19 :                 escstr = yyextra->lookahead_yylval.str;
 1182 tgl                       277 CBC          19 :                 if (strlen(escstr) != 1 || !check_uescapechar(escstr[0]))
 1182 tgl                       278 GIC           3 :                     scanner_yyerror("invalid Unicode escape character",
 1182 tgl                       279 ECB             :                                     yyscanner);
                                280                 : 
                                281                 :                 /* Now restore *llocp; errors will point to first token */
 1182 tgl                       282 CBC          16 :                 *llocp = cur_yylloc;
                                283                 : 
                                284                 :                 /* Apply Unicode conversion */
 1182 tgl                       285 GIC          16 :                 lvalp->core_yystype.str =
                                286              16 :                     str_udeescape(lvalp->core_yystype.str,
                                287              16 :                                   escstr[0],
 1182 tgl                       288 ECB             :                                   *llocp,
                                289                 :                                   yyscanner);
                                290                 : 
                                291                 :                 /*
                                292                 :                  * We don't need to revert the un-truncation of UESCAPE.  What
                                293                 :                  * we do want to do is clear have_lookahead, thereby consuming
                                294                 :                  * all three tokens.
                                295                 :                  */
 1182 tgl                       296 GIC          16 :                 yyextra->have_lookahead = false;
                                297                 :             }
 1182 tgl                       298 ECB             :             else
                                299                 :             {
                                300                 :                 /* No UESCAPE, so convert using default escape character */
 1182 tgl                       301 GIC          87 :                 lvalp->core_yystype.str =
 1182 tgl                       302 CBC         111 :                     str_udeescape(lvalp->core_yystype.str,
 1182 tgl                       303 ECB             :                                   '\\',
                                304                 :                                   *llocp,
                                305                 :                                   yyscanner);
                                306                 :             }
                                307                 : 
 1182 tgl                       308 CBC         103 :             if (cur_token == UIDENT)
                                309                 :             {
                                310                 :                 /* It's an identifier, so truncate as appropriate */
                                311              13 :                 truncate_identifier(lvalp->core_yystype.str,
                                312              13 :                                     strlen(lvalp->core_yystype.str),
 1182 tgl                       313 ECB             :                                     true);
 1182 tgl                       314 GIC          13 :                 cur_token = IDENT;
                                315                 :             }
                                316              90 :             else if (cur_token == USCONST)
                                317                 :             {
                                318              90 :                 cur_token = SCONST;
                                319                 :             }
                                320             103 :             break;
                                321                 :     }
 6161 tgl                       322 ECB             : 
 6161 tgl                       323 GIC       68467 :     return cur_token;
                                324                 : }
                                325                 : 
                                326                 : /* convert hex digit (caller should have verified that) to value */
 1182 tgl                       327 ECB             : static unsigned int
 1182 tgl                       328 CBC         794 : hexval(unsigned char c)
                                329                 : {
 1182 tgl                       330 GIC         794 :     if (c >= '0' && c <= '9')
                                331             669 :         return c - '0';
                                332             125 :     if (c >= 'a' && c <= 'f')
                                333              30 :         return c - 'a' + 0xA;
 1182 tgl                       334 CBC          95 :     if (c >= 'A' && c <= 'F')
 1182 tgl                       335 GIC          95 :         return c - 'A' + 0xA;
 1182 tgl                       336 UIC           0 :     elog(ERROR, "invalid hexadecimal digit");
 1182 tgl                       337 ECB             :     return 0;                   /* not reached */
                                338                 : }
                                339                 : 
 1129                           340                 : /* is Unicode code point acceptable? */
                                341                 : static void
 1129 tgl                       342 CBC         190 : check_unicode_value(pg_wchar c)
                                343                 : {
                                344             190 :     if (!is_valid_unicode_codepoint(c))
 1182 tgl                       345 GIC           3 :         ereport(ERROR,
 1182 tgl                       346 ECB             :                 (errcode(ERRCODE_SYNTAX_ERROR),
                                347                 :                  errmsg("invalid Unicode escape value")));
 1182 tgl                       348 GIC         187 : }
 1182 tgl                       349 ECB             : 
                                350                 : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
                                351                 : static bool
 1182 tgl                       352 GIC          19 : check_uescapechar(unsigned char escape)
                                353                 : {
 1182 tgl                       354 CBC          19 :     if (isxdigit(escape)
 1182 tgl                       355 GIC          19 :         || escape == '+'
 1182 tgl                       356 CBC          16 :         || escape == '\''
                                357              16 :         || escape == '"'
                                358              16 :         || scanner_isspace(escape))
                                359               3 :         return false;
 1182 tgl                       360 ECB             :     else
 1182 tgl                       361 CBC          16 :         return true;
 1182 tgl                       362 EUB             : }
                                363                 : 
                                364                 : /*
                                365                 :  * Process Unicode escapes in "str", producing a palloc'd plain string
                                366                 :  *
                                367                 :  * escape: the escape character to use
 1182 tgl                       368 ECB             :  * position: start position of U&'' or U&"" string token
                                369                 :  * yyscanner: context information needed for error reports
                                370                 :  */
                                371                 : static char *
 1182 tgl                       372 GIC         127 : str_udeescape(const char *str, char escape,
                                373                 :               int position, core_yyscan_t yyscanner)
 1182 tgl                       374 ECB             : {
                                375                 :     const char *in;
                                376                 :     char       *new,
                                377                 :                *out;
 1129                           378                 :     size_t      new_len;
 1182 tgl                       379 GIC         127 :     pg_wchar    pair_first = 0;
 1129 tgl                       380 ECB             :     ScannerCallbackState scbstate;
 1182                           381                 : 
                                382                 :     /*
 1129                           383                 :      * Guesstimate that result will be no longer than input, but allow enough
                                384                 :      * padding for Unicode conversion.
 1182                           385                 :      */
 1129 tgl                       386 GIC         127 :     new_len = strlen(str) + MAX_UNICODE_EQUIVALENT_STRING + 1;
 1129 tgl                       387 CBC         127 :     new = palloc(new_len);
                                388                 : 
 1182 tgl                       389 GIC         127 :     in = str;
                                390             127 :     out = new;
                                391             696 :     while (*in)
                                392                 :     {
                                393                 :         /* Enlarge string if needed */
 1129                           394             590 :         size_t      out_dist = out - new;
                                395                 : 
                                396             590 :         if (out_dist > new_len - (MAX_UNICODE_EQUIVALENT_STRING + 1))
                                397                 :         {
 1129 tgl                       398 LBC           0 :             new_len *= 2;
 1129 tgl                       399 UIC           0 :             new = repalloc(new, new_len);
                                400               0 :             out = new + out_dist;
                                401                 :         }
                                402                 : 
 1182 tgl                       403 GIC         590 :         if (in[0] == escape)
                                404                 :         {
 1129 tgl                       405 ECB             :             /*
                                406                 :              * Any errors reported while processing this escape sequence will
                                407                 :              * have an error cursor pointing at the escape.
                                408                 :              */
 1129 tgl                       409 GIC         202 :             setup_scanner_errposition_callback(&scbstate, yyscanner,
                                410             202 :                                                in - str + position + 3);    /* 3 for U&" */
 1182                           411             202 :             if (in[1] == escape)
 1182 tgl                       412 ECB             :             {
 1182 tgl                       413 CBC           6 :                 if (pair_first)
 1182 tgl                       414 GIC           3 :                     goto invalid_pair;
 1182 tgl                       415 CBC           3 :                 *out++ = escape;
                                416               3 :                 in += 2;
 1182 tgl                       417 ECB             :             }
 1182 tgl                       418 GIC         196 :             else if (isxdigit((unsigned char) in[1]) &&
                                419             176 :                      isxdigit((unsigned char) in[2]) &&
 1182 tgl                       420 CBC         176 :                      isxdigit((unsigned char) in[3]) &&
 1182 tgl                       421 GIC         176 :                      isxdigit((unsigned char) in[4]))
 1182 tgl                       422 CBC         170 :             {
                                423                 :                 pg_wchar    unicode;
 1182 tgl                       424 EUB             : 
 1182 tgl                       425 GBC         173 :                 unicode = (hexval(in[1]) << 12) +
                                426             173 :                     (hexval(in[2]) << 8) +
 1182 tgl                       427 GIC         173 :                     (hexval(in[3]) << 4) +
                                428             173 :                     hexval(in[4]);
 1129 tgl                       429 CBC         173 :                 check_unicode_value(unicode);
 1182 tgl                       430 GIC         173 :                 if (pair_first)
                                431                 :                 {
                                432               3 :                     if (is_utf16_surrogate_second(unicode))
                                433                 :                     {
 1182 tgl                       434 UIC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
 1182 tgl                       435 LBC           0 :                         pair_first = 0;
 1182 tgl                       436 ECB             :                     }
                                437                 :                     else
 1182 tgl                       438 GIC           3 :                         goto invalid_pair;
 1182 tgl                       439 ECB             :                 }
 1182 tgl                       440 CBC         170 :                 else if (is_utf16_surrogate_second(unicode))
 1182 tgl                       441 LBC           0 :                     goto invalid_pair;
 1182 tgl                       442 ECB             : 
 1182 tgl                       443 GIC         170 :                 if (is_utf16_surrogate_first(unicode))
 1182 tgl                       444 CBC          12 :                     pair_first = unicode;
 1182 tgl                       445 ECB             :                 else
                                446                 :                 {
 1129 tgl                       447 CBC         158 :                     pg_unicode_to_server(unicode, (unsigned char *) out);
                                448             158 :                     out += strlen(out);
                                449                 :                 }
 1182 tgl                       450 GIC         170 :                 in += 5;
 1182 tgl                       451 ECB             :             }
 1182 tgl                       452 CBC          23 :             else if (in[1] == '+' &&
                                453              20 :                      isxdigit((unsigned char) in[2]) &&
                                454              20 :                      isxdigit((unsigned char) in[3]) &&
                                455              20 :                      isxdigit((unsigned char) in[4]) &&
                                456              20 :                      isxdigit((unsigned char) in[5]) &&
 1182 tgl                       457 GIC          20 :                      isxdigit((unsigned char) in[6]) &&
 1182 tgl                       458 CBC          17 :                      isxdigit((unsigned char) in[7]))
 1182 tgl                       459 GIC          11 :             {
 1182 tgl                       460 EUB             :                 pg_wchar    unicode;
                                461                 : 
 1182 tgl                       462 GIC          17 :                 unicode = (hexval(in[2]) << 20) +
                                463              17 :                     (hexval(in[3]) << 16) +
 1182 tgl                       464 CBC          17 :                     (hexval(in[4]) << 12) +
 1182 tgl                       465 GIC          17 :                     (hexval(in[5]) << 8) +
 1182 tgl                       466 CBC          17 :                     (hexval(in[6]) << 4) +
 1182 tgl                       467 GBC          17 :                     hexval(in[7]);
 1129 tgl                       468 GIC          17 :                 check_unicode_value(unicode);
 1182 tgl                       469 CBC          14 :                 if (pair_first)
 1182 tgl                       470 ECB             :                 {
 1182 tgl                       471 GIC           3 :                     if (is_utf16_surrogate_second(unicode))
                                472                 :                     {
 1182 tgl                       473 LBC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                                474               0 :                         pair_first = 0;
                                475                 :                     }
 1182 tgl                       476 ECB             :                     else
 1182 tgl                       477 GIC           3 :                         goto invalid_pair;
 1182 tgl                       478 ECB             :                 }
 1182 tgl                       479 CBC          11 :                 else if (is_utf16_surrogate_second(unicode))
 1182 tgl                       480 LBC           0 :                     goto invalid_pair;
 1182 tgl                       481 ECB             : 
 1182 tgl                       482 CBC          11 :                 if (is_utf16_surrogate_first(unicode))
                                483               3 :                     pair_first = unicode;
 1182 tgl                       484 ECB             :                 else
                                485                 :                 {
 1129 tgl                       486 GIC           8 :                     pg_unicode_to_server(unicode, (unsigned char *) out);
                                487               8 :                     out += strlen(out);
 1182 tgl                       488 ECB             :                 }
 1182 tgl                       489 CBC          11 :                 in += 8;
 1182 tgl                       490 ECB             :             }
                                491                 :             else
 1182 tgl                       492 CBC           6 :                 ereport(ERROR,
 1182 tgl                       493 ECB             :                         (errcode(ERRCODE_SYNTAX_ERROR),
 1129                           494                 :                          errmsg("invalid Unicode escape"),
                                495                 :                          errhint("Unicode escapes must be \\XXXX or \\+XXXXXX.")));
                                496                 : 
 1129 tgl                       497 CBC         184 :             cancel_scanner_errposition_callback(&scbstate);
                                498                 :         }
 1182 tgl                       499 EUB             :         else
                                500                 :         {
 1182 tgl                       501 GIC         388 :             if (pair_first)
                                502               3 :                 goto invalid_pair;
 1182 tgl                       503 ECB             : 
 1182 tgl                       504 GIC         385 :             *out++ = *in++;
 1182 tgl                       505 ECB             :         }
 1182 tgl                       506 EUB             :     }
                                507                 : 
 1182 tgl                       508 ECB             :     /* unfinished surrogate pair? */
 1182 tgl                       509 CBC         106 :     if (pair_first)
 1182 tgl                       510 GIC           3 :         goto invalid_pair;
                                511                 : 
 1182 tgl                       512 CBC         103 :     *out = '\0';
 1129                           513             103 :     return new;
                                514                 : 
 1182 tgl                       515 ECB             :     /*
                                516                 :      * We might get here with the error callback active, or not.  Call
                                517                 :      * scanner_errposition to make sure an error cursor appears; if the
 1129                           518                 :      * callback is active, this is duplicative but harmless.
                                519                 :      */
 1182 tgl                       520 GIC          15 : invalid_pair:
                                521              15 :     ereport(ERROR,
                                522                 :             (errcode(ERRCODE_SYNTAX_ERROR),
 1182 tgl                       523 ECB             :              errmsg("invalid Unicode surrogate pair"),
                                524                 :              scanner_errposition(in - str + position + 3,   /* 3 for U&" */
                                525                 :                                  yyscanner)));
                                526                 :     return NULL;                /* keep compiler quiet */
                                527                 : }

Generated by: LCOV version v1.16-55-g56c0a2a