LCOV - differential code coverage report
Current view: top level - src/backend/parser - scansup.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 96.7 % 30 29 1 29
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 4 4 4
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * scansup.c
       4                 :  *    scanner support routines used by the core lexer
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/parser/scansup.c
      12                 :  *
      13                 :  *-------------------------------------------------------------------------
      14                 :  */
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include <ctype.h>
      18                 : 
      19                 : #include "mb/pg_wchar.h"
      20                 : #include "parser/scansup.h"
      21                 : 
      22                 : 
      23                 : /*
      24                 :  * downcase_truncate_identifier() --- do appropriate downcasing and
      25                 :  * truncation of an unquoted identifier.  Optionally warn of truncation.
      26                 :  *
      27                 :  * Returns a palloc'd string containing the adjusted identifier.
      28                 :  *
      29                 :  * Note: in some usages the passed string is not null-terminated.
      30                 :  *
      31                 :  * Note: the API of this function is designed to allow for downcasing
      32                 :  * transformations that increase the string length, but we don't yet
      33                 :  * support that.  If you want to implement it, you'll need to fix
      34                 :  * SplitIdentifierString() in utils/adt/varlena.c.
      35                 :  */
      36                 : char *
      37 CBC     5146715 : downcase_truncate_identifier(const char *ident, int len, bool warn)
      38                 : {
      39         5146715 :     return downcase_identifier(ident, len, warn, true);
      40                 : }
      41                 : 
      42                 : /*
      43                 :  * a workhorse for downcase_truncate_identifier
      44                 :  */
      45                 : char *
      46         5146766 : downcase_identifier(const char *ident, int len, bool warn, bool truncate)
      47                 : {
      48                 :     char       *result;
      49                 :     int         i;
      50                 :     bool        enc_is_single_byte;
      51                 : 
      52         5146766 :     result = palloc(len + 1);
      53         5146766 :     enc_is_single_byte = pg_database_encoding_max_length() == 1;
      54                 : 
      55                 :     /*
      56                 :      * SQL99 specifies Unicode-aware case normalization, which we don't yet
      57                 :      * have the infrastructure for.  Instead we use tolower() to provide a
      58                 :      * locale-aware translation.  However, there are some locales where this
      59                 :      * is not right either (eg, Turkish may do strange things with 'i' and
      60                 :      * 'I').  Our current compromise is to use tolower() for characters with
      61                 :      * the high bit set, as long as they aren't part of a multi-byte
      62                 :      * character, and use an ASCII-only downcasing for 7-bit characters.
      63                 :      */
      64        44713542 :     for (i = 0; i < len; i++)
      65                 :     {
      66        39566776 :         unsigned char ch = (unsigned char) ident[i];
      67                 : 
      68        39566776 :         if (ch >= 'A' && ch <= 'Z')
      69          907543 :             ch += 'a' - 'A';
      70        38659233 :         else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
      71 UBC           0 :             ch = tolower(ch);
      72 CBC    39566776 :         result[i] = (char) ch;
      73                 :     }
      74         5146766 :     result[i] = '\0';
      75                 : 
      76         5146766 :     if (i >= NAMEDATALEN && truncate)
      77               6 :         truncate_identifier(result, i, warn);
      78                 : 
      79         5146766 :     return result;
      80                 : }
      81                 : 
      82                 : 
      83                 : /*
      84                 :  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
      85                 :  *
      86                 :  * The given string is modified in-place, if necessary.  A warning is
      87                 :  * issued if requested.
      88                 :  *
      89                 :  * We require the caller to pass in the string length since this saves a
      90                 :  * strlen() call in some common usages.
      91                 :  */
      92                 : void
      93          147100 : truncate_identifier(char *ident, int len, bool warn)
      94                 : {
      95          147100 :     if (len >= NAMEDATALEN)
      96                 :     {
      97               7 :         len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
      98               7 :         if (warn)
      99               7 :             ereport(NOTICE,
     100                 :                     (errcode(ERRCODE_NAME_TOO_LONG),
     101                 :                      errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
     102                 :                             ident, len, ident)));
     103               7 :         ident[len] = '\0';
     104                 :     }
     105          147100 : }
     106                 : 
     107                 : /*
     108                 :  * scanner_isspace() --- return true if flex scanner considers char whitespace
     109                 :  *
     110                 :  * This should be used instead of the potentially locale-dependent isspace()
     111                 :  * function when it's important to match the lexer's behavior.
     112                 :  *
     113                 :  * In principle we might need similar functions for isalnum etc, but for the
     114                 :  * moment only isspace seems needed.
     115                 :  */
     116                 : bool
     117         1052917 : scanner_isspace(char ch)
     118                 : {
     119                 :     /* This must match scan.l's list of {space} characters */
     120         1052917 :     if (ch == ' ' ||
     121         1023924 :         ch == '\t' ||
     122         1023524 :         ch == '\n' ||
     123         1023521 :         ch == '\r' ||
     124                 :         ch == '\f')
     125           29396 :         return true;
     126         1023521 :     return false;
     127                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a