LCOV - differential code coverage report
Current view: top level - src/backend/nodes - read.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 62.1 % 177 110 28 10 14 15 10 55 6 39 39 58 3 3
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 6 6 6 6
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * read.c
       4                 :  *    routines to convert a string (legal ascii representation of node) back
       5                 :  *    to nodes
       6                 :  *
       7                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       8                 :  * Portions Copyright (c) 1994, Regents of the University of California
       9                 :  *
      10                 :  *
      11                 :  * IDENTIFICATION
      12                 :  *    src/backend/nodes/read.c
      13                 :  *
      14                 :  * HISTORY
      15                 :  *    AUTHOR            DATE            MAJOR EVENT
      16                 :  *    Andrew Yu         Nov 2, 1994     file creation
      17                 :  *
      18                 :  *-------------------------------------------------------------------------
      19                 :  */
      20                 : #include "postgres.h"
      21                 : 
      22                 : #include <ctype.h>
      23                 : 
      24                 : #include "common/string.h"
      25                 : #include "nodes/bitmapset.h"
      26                 : #include "nodes/pg_list.h"
      27                 : #include "nodes/readfuncs.h"
      28                 : #include "nodes/value.h"
      29                 : 
      30                 : 
      31                 : /* Static state for pg_strtok */
      32                 : static const char *pg_strtok_ptr = NULL;
      33                 : 
      34                 : /* State flag that determines how readfuncs.c should treat location fields */
      35                 : #ifdef WRITE_READ_PARSE_PLAN_TREES
      36                 : bool        restore_location_fields = false;
      37                 : #endif
      38                 : 
      39                 : 
      40                 : /*
      41                 :  * stringToNode -
      42                 :  *    builds a Node tree from its string representation (assumed valid)
      43                 :  *
      44                 :  * restore_loc_fields instructs readfuncs.c whether to restore location
      45                 :  * fields rather than set them to -1.  This is currently only supported
      46                 :  * in builds with the WRITE_READ_PARSE_PLAN_TREES debugging flag set.
      47                 :  */
      48                 : static void *
      49 GIC      254072 : stringToNodeInternal(const char *str, bool restore_loc_fields)
      50 ECB             : {
      51                 :     void       *retval;
      52                 :     const char *save_strtok;
      53                 : #ifdef WRITE_READ_PARSE_PLAN_TREES
      54                 :     bool        save_restore_location_fields;
      55                 : #endif
      56                 : 
      57                 :     /*
      58                 :      * We save and restore the pre-existing state of pg_strtok. This makes the
      59                 :      * world safe for re-entrant invocation of stringToNode, without incurring
      60                 :      * a lot of notational overhead by having to pass the next-character
      61                 :      * pointer around through all the readfuncs.c code.
      62                 :      */
      63 GIC      254072 :     save_strtok = pg_strtok_ptr;
      64 ECB             : 
      65 GIC      254072 :     pg_strtok_ptr = str;        /* point pg_strtok at the string to read */
      66 ECB             : 
      67                 :     /*
      68                 :      * If enabled, likewise save/restore the location field handling flag.
      69                 :      */
      70                 : #ifdef WRITE_READ_PARSE_PLAN_TREES
      71                 :     save_restore_location_fields = restore_location_fields;
      72                 :     restore_location_fields = restore_loc_fields;
      73                 : #endif
      74                 : 
      75 GIC      254072 :     retval = nodeRead(NULL, 0); /* do the reading */
      76 ECB             : 
      77 GIC      254072 :     pg_strtok_ptr = save_strtok;
      78 ECB             : 
      79                 : #ifdef WRITE_READ_PARSE_PLAN_TREES
      80                 :     restore_location_fields = save_restore_location_fields;
      81                 : #endif
      82                 : 
      83 GIC      254072 :     return retval;
      84 ECB             : }
      85                 : 
      86                 : /*
      87                 :  * Externally visible entry points
      88                 :  */
      89                 : void *
      90 GIC      254072 : stringToNode(const char *str)
      91 ECB             : {
      92 GIC      254072 :     return stringToNodeInternal(str, false);
      93 ECB             : }
      94                 : 
      95                 : #ifdef WRITE_READ_PARSE_PLAN_TREES
      96                 : 
      97                 : void *
      98                 : stringToNodeWithLocations(const char *str)
      99                 : {
     100                 :     return stringToNodeInternal(str, true);
     101                 : }
     102                 : 
     103                 : #endif
     104                 : 
     105                 : 
     106                 : /*****************************************************************************
     107                 :  *
     108                 :  * the lisp token parser
     109                 :  *
     110                 :  *****************************************************************************/
     111                 : 
     112                 : /*
     113                 :  * pg_strtok --- retrieve next "token" from a string.
     114                 :  *
     115                 :  * Works kinda like strtok, except it never modifies the source string.
     116                 :  * (Instead of storing nulls into the string, the length of the token
     117                 :  * is returned to the caller.)
     118                 :  * Also, the rules about what is a token are hard-wired rather than being
     119                 :  * configured by passing a set of terminating characters.
     120                 :  *
     121                 :  * The string is assumed to have been initialized already by stringToNode.
     122                 :  *
     123                 :  * The rules for tokens are:
     124                 :  *  * Whitespace (space, tab, newline) always separates tokens.
     125                 :  *  * The characters '(', ')', '{', '}' form individual tokens even
     126                 :  *    without any whitespace around them.
     127                 :  *  * Otherwise, a token is all the characters up to the next whitespace
     128                 :  *    or occurrence of one of the four special characters.
     129                 :  *  * A backslash '\' can be used to quote whitespace or one of the four
     130                 :  *    special characters, so that it is treated as a plain token character.
     131                 :  *    Backslashes themselves must also be backslashed for consistency.
     132                 :  *    Any other character can be, but need not be, backslashed as well.
     133                 :  *  * If the resulting token is '<>' (with no backslash), it is returned
     134                 :  *    as a non-NULL pointer to the token but with length == 0.  Note that
     135                 :  *    there is no other way to get a zero-length token.
     136                 :  *
     137                 :  * Returns a pointer to the start of the next token, and the length of the
     138                 :  * token (including any embedded backslashes!) in *length.  If there are
     139                 :  * no more tokens, NULL and 0 are returned.
     140                 :  *
     141                 :  * NOTE: this routine doesn't remove backslashes; the caller must do so
     142                 :  * if necessary (see "debackslash").
     143                 :  *
     144                 :  * NOTE: prior to release 7.0, this routine also had a special case to treat
     145                 :  * a token starting with '"' as extending to the next '"'.  This code was
     146                 :  * broken, however, since it would fail to cope with a string containing an
     147                 :  * embedded '"'.  I have therefore removed this special case, and instead
     148                 :  * introduced rules for using backslashes to quote characters.  Higher-level
     149                 :  * code should add backslashes to a string constant to ensure it is treated
     150                 :  * as a single token.
     151                 :  */
     152                 : const char *
     153 GIC   222259213 : pg_strtok(int *length)
     154 ECB             : {
     155                 :     const char *local_str;      /* working pointer to string */
     156                 :     const char *ret_str;        /* start of token to return */
     157                 : 
     158 GIC   222259213 :     local_str = pg_strtok_ptr;
     159 ECB             : 
     160 GIC   410837470 :     while (*local_str == ' ' || *local_str == '\n' || *local_str == '\t')
     161 CBC   188578257 :         local_str++;
     162 ECB             : 
     163 GIC   222259213 :     if (*local_str == '\0')
     164 ECB             :     {
     165 UIC           0 :         *length = 0;
     166 UBC           0 :         pg_strtok_ptr = local_str;
     167               0 :         return NULL;            /* no more tokens */
     168 EUB             :     }
     169                 : 
     170                 :     /*
     171                 :      * Now pointing at start of next token.
     172                 :      */
     173 GIC   222259213 :     ret_str = local_str;
     174 ECB             : 
     175 GIC   222259213 :     if (*local_str == '(' || *local_str == ')' ||
     176 CBC   207843109 :         *local_str == '{' || *local_str == '}')
     177 ECB             :     {
     178                 :         /* special 1-character token */
     179 GIC    33429390 :         local_str++;
     180 ECB             :     }
     181                 :     else
     182                 :     {
     183                 :         /* Normal token, possibly containing backslashes */
     184 GIC   188829823 :         while (*local_str != '\0' &&
     185 CBC  1328225552 :                *local_str != ' ' && *local_str != '\n' &&
     186      1153985411 :                *local_str != '\t' &&
     187      1153985411 :                *local_str != '(' && *local_str != ')' &&
     188      2476633182 :                *local_str != '{' && *local_str != '}')
     189 ECB             :         {
     190 GIC  1139454946 :             if (*local_str == '\\' && local_str[1] != '\0')
     191 CBC       54104 :                 local_str += 2;
     192 ECB             :             else
     193 GIC  1139400842 :                 local_str++;
     194 ECB             :         }
     195                 :     }
     196                 : 
     197 GIC   222259213 :     *length = local_str - ret_str;
     198 ECB             : 
     199                 :     /* Recognize special case for "empty" token */
     200 GIC   222259213 :     if (*length == 2 && ret_str[0] == '<' && ret_str[1] == '>')
     201 CBC     4165194 :         *length = 0;
     202 ECB             : 
     203 GIC   222259213 :     pg_strtok_ptr = local_str;
     204 ECB             : 
     205 GIC   222259213 :     return ret_str;
     206 ECB             : }
     207                 : 
     208                 : /*
     209                 :  * debackslash -
     210                 :  *    create a palloc'd string holding the given token.
     211                 :  *    any protective backslashes in the token are removed.
     212                 :  */
     213                 : char *
     214 GIC     7918087 : debackslash(const char *token, int length)
     215 ECB             : {
     216 GIC     7918087 :     char       *result = palloc(length + 1);
     217 CBC     7918087 :     char       *ptr = result;
     218 ECB             : 
     219 GIC    83206130 :     while (length > 0)
     220 ECB             :     {
     221 GIC    75288043 :         if (*token == '\\' && length > 1)
     222 CBC       54104 :             token++, length--;
     223        75288043 :         *ptr++ = *token++;
     224        75288043 :         length--;
     225 ECB             :     }
     226 GIC     7918087 :     *ptr = '\0';
     227 CBC     7918087 :     return result;
     228 ECB             : }
     229                 : 
     230                 : #define RIGHT_PAREN (1000000 + 1)
     231                 : #define LEFT_PAREN  (1000000 + 2)
     232                 : #define LEFT_BRACE  (1000000 + 3)
     233                 : #define OTHER_TOKEN (1000000 + 4)
     234                 : 
     235                 : /*
     236                 :  * nodeTokenType -
     237                 :  *    returns the type of the node token contained in token.
     238                 :  *    It returns one of the following valid NodeTags:
     239                 :  *      T_Integer, T_Float, T_Boolean, T_String, T_BitString
     240                 :  *    and some of its own:
     241                 :  *      RIGHT_PAREN, LEFT_PAREN, LEFT_BRACE, OTHER_TOKEN
     242                 :  *
     243                 :  *    Assumption: the ascii representation is legal
     244                 :  */
     245                 : static NodeTag
     246 GIC    22146526 : nodeTokenType(const char *token, int length)
     247 ECB             : {
     248                 :     NodeTag     retval;
     249                 :     const char *numptr;
     250                 :     int         numlen;
     251                 : 
     252                 :     /*
     253                 :      * Check if the token is a number
     254                 :      */
     255 GIC    22146526 :     numptr = token;
     256 CBC    22146526 :     numlen = length;
     257        22146526 :     if (*numptr == '+' || *numptr == '-')
     258 LBC           0 :         numptr++, numlen--;
     259 GBC    22146526 :     if ((numlen > 0 && isdigit((unsigned char) *numptr)) ||
     260 CBC     6536021 :         (numlen > 1 && *numptr == '.' && isdigit((unsigned char) numptr[1])))
     261 ECB             :     {
     262                 :         /*
     263                 :          * Yes.  Figure out whether it is integral or float; this requires
     264                 :          * both a syntax check and a range check. strtoint() can do both for
     265                 :          * us. We know the token will end at a character that strtoint will
     266                 :          * stop at, so we do not need to modify the string.
     267                 :          */
     268                 :         char       *endptr;
     269                 : 
     270 UIC           0 :         errno = 0;
     271 UNC           0 :         (void) strtoint(numptr, &endptr, 10);
     272 UBC           0 :         if (endptr != token + length || errno == ERANGE)
     273               0 :             return T_Float;
     274               0 :         return T_Integer;
     275 EUB             :     }
     276                 : 
     277                 :     /*
     278                 :      * these three cases do not need length checks, since pg_strtok() will
     279                 :      * always treat them as single-byte tokens
     280                 :      */
     281 GIC    22146526 :     else if (*token == '(')
     282 CBC     2103298 :         retval = LEFT_PAREN;
     283        20043228 :     else if (*token == ')')
     284 LBC           0 :         retval = RIGHT_PAREN;
     285 GBC    20043228 :     else if (*token == '{')
     286 CBC     9506643 :         retval = LEFT_BRACE;
     287        10536585 :     else if ((length == 4 && strncmp(token, "true", 4) == 0) ||
     288          339396 :              (length == 5 && strncmp(token, "false", 5) == 0))
     289 LBC           0 :         retval = T_Boolean;
     290 GBC    10536585 :     else if (*token == '"' && length > 1 && token[length - 1] == '"')
     291 CBC     6536021 :         retval = T_String;
     292 GNC     4000564 :     else if (*token == 'b' || *token == 'x')
     293 LBC           0 :         retval = T_BitString;
     294 EUB             :     else
     295 GIC     4000564 :         retval = OTHER_TOKEN;
     296 CBC    22146526 :     return retval;
     297 ECB             : }
     298                 : 
     299                 : /*
     300                 :  * nodeRead -
     301                 :  *    Slightly higher-level reader.
     302                 :  *
     303                 :  * This routine applies some semantic knowledge on top of the purely
     304                 :  * lexical tokenizer pg_strtok().   It can read
     305                 :  *  * Value token nodes (integers, floats, booleans, or strings);
     306                 :  *  * General nodes (via parseNodeString() from readfuncs.c);
     307                 :  *  * Lists of the above;
     308                 :  *  * Lists of integers, OIDs, or TransactionIds.
     309                 :  * The return value is declared void *, not Node *, to avoid having to
     310                 :  * cast it explicitly in callers that assign to fields of different types.
     311                 :  *
     312                 :  * External callers should always pass NULL/0 for the arguments.  Internally
     313                 :  * a non-NULL token may be passed when the upper recursion level has already
     314                 :  * scanned the first token of a node's representation.
     315                 :  *
     316                 :  * We assume pg_strtok is already initialized with a string to read (hence
     317                 :  * this should only be invoked from within a stringToNode operation).
     318                 :  */
     319                 : void *
     320 GIC    22146526 : nodeRead(const char *token, int tok_len)
     321 ECB             : {
     322                 :     Node       *result;
     323                 :     NodeTag     type;
     324                 : 
     325 GIC    22146526 :     if (token == NULL)          /* need to read a token? */
     326 ECB             :     {
     327 GIC     8871486 :         token = pg_strtok(&tok_len);
     328 ECB             : 
     329 GIC     8871486 :         if (token == NULL)      /* end of input */
     330 LBC           0 :             return NULL;
     331 EUB             :     }
     332                 : 
     333 GIC    22146526 :     type = nodeTokenType(token, tok_len);
     334 ECB             : 
     335 GIC    22146526 :     switch ((int) type)
     336 ECB             :     {
     337 GIC     9506643 :         case LEFT_BRACE:
     338 CBC     9506643 :             result = parseNodeString();
     339         9506643 :             token = pg_strtok(&tok_len);
     340         9506643 :             if (token == NULL || token[0] != '}')
     341 LBC           0 :                 elog(ERROR, "did not find '}' at end of input node");
     342 GBC     9506643 :             break;
     343 CBC     2103298 :         case LEFT_PAREN:
     344 ECB             :             {
     345 GIC     2103298 :                 List       *l = NIL;
     346 ECB             : 
     347                 :                 /*----------
     348                 :                  * Could be an integer list:    (i int int ...)
     349                 :                  * or an OID list:              (o int int ...)
     350                 :                  * or an XID list:              (x int int ...)
     351                 :                  * or a bitmapset:              (b int int ...)
     352                 :                  * or a list of nodes/values:   (node node ...)
     353                 :                  *----------
     354                 :                  */
     355 GIC     2103298 :                 token = pg_strtok(&tok_len);
     356         2103298 :                 if (token == NULL)
     357 UIC           0 :                     elog(ERROR, "unterminated List structure");
     358 CBC     2103298 :                 if (tok_len == 1 && token[0] == 'i')
     359 ECB             :                 {
     360 EUB             :                     /* List of integers */
     361 ECB             :                     for (;;)
     362 GIC     3291055 :                     {
     363                 :                         int         val;
     364                 :                         char       *endptr;
     365 ECB             : 
     366 GIC     3434670 :                         token = pg_strtok(&tok_len);
     367         3434670 :                         if (token == NULL)
     368 UIC           0 :                             elog(ERROR, "unterminated List structure");
     369 CBC     3434670 :                         if (token[0] == ')')
     370          143615 :                             break;
     371 GBC     3291055 :                         val = (int) strtol(token, &endptr, 10);
     372 CBC     3291055 :                         if (endptr != token + tok_len)
     373 LBC           0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     374 ECB             :                                  tok_len, token);
     375 CBC     3291055 :                         l = lappend_int(l, val);
     376 EUB             :                     }
     377 GNC      143615 :                     result = (Node *) l;
     378                 :                 }
     379 CBC     1959683 :                 else if (tok_len == 1 && token[0] == 'o')
     380                 :                 {
     381 ECB             :                     /* List of OIDs */
     382                 :                     for (;;)
     383 CBC      198691 :                     {
     384                 :                         Oid         val;
     385                 :                         char       *endptr;
     386                 : 
     387          246445 :                         token = pg_strtok(&tok_len);
     388 GIC      246445 :                         if (token == NULL)
     389 UIC           0 :                             elog(ERROR, "unterminated List structure");
     390 GIC      246445 :                         if (token[0] == ')')
     391 CBC       47754 :                             break;
     392          198691 :                         val = (Oid) strtoul(token, &endptr, 10);
     393 GBC      198691 :                         if (endptr != token + tok_len)
     394 LBC           0 :                             elog(ERROR, "unrecognized OID: \"%.*s\"",
     395 ECB             :                                  tok_len, token);
     396 CBC      198691 :                         l = lappend_oid(l, val);
     397 ECB             :                     }
     398 GNC       47754 :                     result = (Node *) l;
     399                 :                 }
     400         1911929 :                 else if (tok_len == 1 && token[0] == 'x')
     401                 :                 {
     402                 :                     /* List of TransactionIds */
     403                 :                     for (;;)
     404 UNC           0 :                     {
     405                 :                         TransactionId val;
     406                 :                         char       *endptr;
     407                 : 
     408               0 :                         token = pg_strtok(&tok_len);
     409               0 :                         if (token == NULL)
     410               0 :                             elog(ERROR, "unterminated List structure");
     411               0 :                         if (token[0] == ')')
     412               0 :                             break;
     413               0 :                         val = (TransactionId) strtoul(token, &endptr, 10);
     414               0 :                         if (endptr != token + tok_len)
     415               0 :                             elog(ERROR, "unrecognized Xid: \"%.*s\"",
     416                 :                                  tok_len, token);
     417               0 :                         l = lappend_xid(l, val);
     418                 :                     }
     419               0 :                     result = (Node *) l;
     420                 :                 }
     421 GNC     1911929 :                 else if (tok_len == 1 && token[0] == 'b')
     422 UNC           0 :                 {
     423                 :                     /* Bitmapset -- see also _readBitmapset() */
     424               0 :                     Bitmapset  *bms = NULL;
     425                 : 
     426                 :                     for (;;)
     427               0 :                     {
     428                 :                         int         val;
     429                 :                         char       *endptr;
     430                 : 
     431               0 :                         token = pg_strtok(&tok_len);
     432               0 :                         if (token == NULL)
     433               0 :                             elog(ERROR, "unterminated Bitmapset structure");
     434               0 :                         if (tok_len == 1 && token[0] == ')')
     435               0 :                             break;
     436               0 :                         val = (int) strtol(token, &endptr, 10);
     437               0 :                         if (endptr != token + tok_len)
     438               0 :                             elog(ERROR, "unrecognized integer: \"%.*s\"",
     439                 :                                  tok_len, token);
     440               0 :                         bms = bms_add_member(bms, val);
     441                 :                     }
     442               0 :                     result = (Node *) bms;
     443 EUB             :                 }
     444                 :                 else
     445 ECB             :                 {
     446                 :                     /* List of other node types */
     447                 :                     for (;;)
     448                 :                     {
     449                 :                         /* We have already scanned next token... */
     450 GIC    15186969 :                         if (token[0] == ')')
     451         1911929 :                             break;
     452        13275040 :                         l = lappend(l, nodeRead(token, tok_len));
     453 GBC    13275040 :                         token = pg_strtok(&tok_len);
     454 GIC    13275040 :                         if (token == NULL)
     455 UIC           0 :                             elog(ERROR, "unterminated List structure");
     456                 :                     }
     457 GNC     1911929 :                     result = (Node *) l;
     458 EUB             :                 }
     459 GBC     2103298 :                 break;
     460 EUB             :             }
     461 UBC           0 :         case RIGHT_PAREN:
     462               0 :             elog(ERROR, "unexpected right parenthesis");
     463 EUB             :             result = NULL;      /* keep compiler happy */
     464                 :             break;
     465 GIC     4000564 :         case OTHER_TOKEN:
     466 GBC     4000564 :             if (tok_len == 0)
     467                 :             {
     468 EUB             :                 /* must be "<>" --- represents a null pointer */
     469 GIC     4000564 :                 result = NULL;
     470 ECB             :             }
     471 EUB             :             else
     472                 :             {
     473 UBC           0 :                 elog(ERROR, "unrecognized token: \"%.*s\"", tok_len, token);
     474                 :                 result = NULL;  /* keep compiler happy */
     475                 :             }
     476 GBC     4000564 :             break;
     477 UIC           0 :         case T_Integer:
     478                 : 
     479                 :             /*
     480 EUB             :              * we know that the token terminates on a char atoi will stop at
     481                 :              */
     482 UBC           0 :             result = (Node *) makeInteger(atoi(token));
     483               0 :             break;
     484               0 :         case T_Float:
     485 EUB             :             {
     486 UBC           0 :                 char       *fval = (char *) palloc(tok_len + 1);
     487 EUB             : 
     488 UIC           0 :                 memcpy(fval, token, tok_len);
     489 UBC           0 :                 fval[tok_len] = '\0';
     490 UIC           0 :                 result = (Node *) makeFloat(fval);
     491 EUB             :             }
     492 UIC           0 :             break;
     493               0 :         case T_Boolean:
     494               0 :             result = (Node *) makeBoolean(token[0] == 't');
     495               0 :             break;
     496 GIC     6536021 :         case T_String:
     497                 :             /* need to remove leading and trailing quotes, and backslashes */
     498         6536021 :             result = (Node *) makeString(debackslash(token + 1, tok_len - 2));
     499 CBC     6536021 :             break;
     500 LBC           0 :         case T_BitString:
     501 ECB             :             {
     502 UNC           0 :                 char       *val = palloc(tok_len + 1);
     503 ECB             : 
     504 UNC           0 :                 memcpy(val, token, tok_len);
     505               0 :                 val[tok_len] = '\0';
     506 UIC           0 :                 result = (Node *) makeBitString(val);
     507 LBC           0 :                 break;
     508                 :             }
     509 UBC           0 :         default:
     510               0 :             elog(ERROR, "unrecognized node type: %d", (int) type);
     511                 :             result = NULL;      /* keep compiler happy */
     512                 :             break;
     513 ECB             :     }
     514                 : 
     515 GIC    22146526 :     return (void *) result;
     516                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a