LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - ascii.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 17.9 % 56 10 1 29 16 1 7 2 29 7
Current Date: 2023-04-08 15:15:32 Functions: 16.7 % 6 1 5 1 5 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-----------------------------------------------------------------------
       2                 :  * ascii.c
       3                 :  *   The PostgreSQL routine for string to ascii conversion.
       4                 :  *
       5                 :  *   Portions Copyright (c) 1999-2023, PostgreSQL Global Development Group
       6                 :  *
       7                 :  * IDENTIFICATION
       8                 :  *    src/backend/utils/adt/ascii.c
       9                 :  *
      10                 :  *-----------------------------------------------------------------------
      11                 :  */
      12                 : #include "postgres.h"
      13                 : 
      14                 : #include "mb/pg_wchar.h"
      15                 : #include "utils/ascii.h"
      16                 : #include "utils/builtins.h"
      17                 : #include "varatt.h"
      18                 : 
      19                 : static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
      20                 :                         unsigned char *dest, int enc);
      21                 : static text *encode_to_ascii(text *data, int enc);
      22                 : 
      23                 : 
      24                 : /* ----------
      25                 :  * to_ascii
      26                 :  * ----------
      27                 :  */
      28                 : static void
      29 UIC           0 : pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
      30 EUB             : {
      31                 :     unsigned char *x;
      32                 :     const unsigned char *ascii;
      33                 :     int         range;
      34                 : 
      35                 :     /*
      36                 :      * relevant start for an encoding
      37                 :      */
      38                 : #define RANGE_128   128
      39                 : #define RANGE_160   160
      40                 : 
      41 UIC           0 :     if (enc == PG_LATIN1)
      42 EUB             :     {
      43                 :         /*
      44                 :          * ISO-8859-1 <range: 160 -- 255>
      45                 :          */
      46 UIC           0 :         ascii = (const unsigned char *) "  cL Y  \"Ca  -R     'u .,      ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      47 UBC           0 :         range = RANGE_160;
      48 EUB             :     }
      49 UIC           0 :     else if (enc == PG_LATIN2)
      50 EUB             :     {
      51                 :         /*
      52                 :          * ISO-8859-2 <range: 160 -- 255>
      53                 :          */
      54 UIC           0 :         ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
      55 UBC           0 :         range = RANGE_160;
      56 EUB             :     }
      57 UIC           0 :     else if (enc == PG_LATIN9)
      58 EUB             :     {
      59                 :         /*
      60                 :          * ISO-8859-15 <range: 160 -- 255>
      61                 :          */
      62 UIC           0 :         ascii = (const unsigned char *) "  cL YS sCa  -R     Zu .z   EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
      63 UBC           0 :         range = RANGE_160;
      64 EUB             :     }
      65 UIC           0 :     else if (enc == PG_WIN1250)
      66 EUB             :     {
      67                 :         /*
      68                 :          * Window CP1250 <range: 128 -- 255>
      69                 :          */
      70 UIC           0 :         ascii = (const unsigned char *) "  ' \"    %S<STZZ `'\"\".--  s>stzz   L A  \"CS  -RZ  ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
      71 UBC           0 :         range = RANGE_128;
      72 EUB             :     }
      73                 :     else
      74                 :     {
      75 UIC           0 :         ereport(ERROR,
      76 EUB             :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
      77                 :                  errmsg("encoding conversion from %s to ASCII not supported",
      78                 :                         pg_encoding_to_char(enc))));
      79                 :         return;                 /* keep compiler quiet */
      80                 :     }
      81                 : 
      82                 :     /*
      83                 :      * Encode
      84                 :      */
      85 UIC           0 :     for (x = src; x < src_end; x++)
      86 EUB             :     {
      87 UIC           0 :         if (*x < 128)
      88 UBC           0 :             *dest++ = *x;
      89               0 :         else if (*x < range)
      90               0 :             *dest++ = ' ';      /* bogus 128 to 'range' */
      91 EUB             :         else
      92 UIC           0 :             *dest++ = ascii[*x - range];
      93 EUB             :     }
      94                 : }
      95                 : 
      96                 : /* ----------
      97                 :  * encode text
      98                 :  *
      99                 :  * The text datum is overwritten in-place, therefore this coding method
     100                 :  * cannot support conversions that change the string length!
     101                 :  * ----------
     102                 :  */
     103                 : static text *
     104 UIC           0 : encode_to_ascii(text *data, int enc)
     105 EUB             : {
     106 UIC           0 :     pg_to_ascii((unsigned char *) VARDATA(data),    /* src */
     107 UBC           0 :                 (unsigned char *) (data) + VARSIZE(data),   /* src end */
     108               0 :                 (unsigned char *) VARDATA(data),    /* dest */
     109 EUB             :                 enc);           /* encoding */
     110                 : 
     111 UIC           0 :     return data;
     112 EUB             : }
     113                 : 
     114                 : /* ----------
     115                 :  * convert to ASCII - enc is set as 'name' arg.
     116                 :  * ----------
     117                 :  */
     118                 : Datum
     119 UIC           0 : to_ascii_encname(PG_FUNCTION_ARGS)
     120 EUB             : {
     121 UIC           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     122 UBC           0 :     char       *encname = NameStr(*PG_GETARG_NAME(1));
     123               0 :     int         enc = pg_char_to_encoding(encname);
     124 EUB             : 
     125 UIC           0 :     if (enc < 0)
     126 UBC           0 :         ereport(ERROR,
     127 EUB             :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     128                 :                  errmsg("%s is not a valid encoding name", encname)));
     129                 : 
     130 UIC           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     131 EUB             : }
     132                 : 
     133                 : /* ----------
     134                 :  * convert to ASCII - enc is set as int4
     135                 :  * ----------
     136                 :  */
     137                 : Datum
     138 UIC           0 : to_ascii_enc(PG_FUNCTION_ARGS)
     139 EUB             : {
     140 UIC           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     141 UBC           0 :     int         enc = PG_GETARG_INT32(1);
     142 EUB             : 
     143 UIC           0 :     if (!PG_VALID_ENCODING(enc))
     144 UBC           0 :         ereport(ERROR,
     145 EUB             :                 (errcode(ERRCODE_UNDEFINED_OBJECT),
     146                 :                  errmsg("%d is not a valid encoding code", enc)));
     147                 : 
     148 UIC           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     149 EUB             : }
     150                 : 
     151                 : /* ----------
     152                 :  * convert to ASCII - current enc is DatabaseEncoding
     153                 :  * ----------
     154                 :  */
     155                 : Datum
     156 UIC           0 : to_ascii_default(PG_FUNCTION_ARGS)
     157 EUB             : {
     158 UIC           0 :     text       *data = PG_GETARG_TEXT_P_COPY(0);
     159 UBC           0 :     int         enc = GetDatabaseEncoding();
     160 EUB             : 
     161 UIC           0 :     PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
     162 EUB             : }
     163                 : 
     164                 : /* ----------
     165                 :  * Copy a string in an arbitrary backend-safe encoding, converting it to a
     166                 :  * valid ASCII string by replacing non-ASCII bytes with '?'.  Otherwise the
     167                 :  * behavior is identical to strlcpy(), except that we don't bother with a
     168                 :  * return value.
     169                 :  *
     170                 :  * This must not trigger ereport(ERROR), as it is called in postmaster.
     171                 :  * ----------
     172                 :  */
     173                 : void
     174 GIC        6584 : ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
     175 ECB             : {
     176 GIC        6584 :     if (destsiz == 0)           /* corner case: no room for trailing nul */
     177 LBC           0 :         return;
     178 EUB             : 
     179 GIC      138714 :     while (--destsiz > 0)
     180 ECB             :     {
     181                 :         /* use unsigned char here to avoid compiler warning */
     182 GIC      138714 :         unsigned char ch = *src++;
     183 ECB             : 
     184 GIC      138714 :         if (ch == '\0')
     185 CBC        6584 :             break;
     186 ECB             :         /* Keep printable ASCII characters */
     187 GIC      132130 :         if (32 <= ch && ch <= 127)
     188 CBC      132130 :             *dest = ch;
     189 ECB             :         /* White-space is also OK */
     190 UIC           0 :         else if (ch == '\n' || ch == '\r' || ch == '\t')
     191 UBC           0 :             *dest = ch;
     192 EUB             :         /* Everything else is replaced with '?' */
     193                 :         else
     194 UIC           0 :             *dest = '?';
     195 GBC      132130 :         dest++;
     196 ECB             :     }
     197                 : 
     198 GIC        6584 :     *dest = '\0';
     199 ECB             : }
        

Generated by: LCOV version v1.16-55-g56c0a2a