LCOV - differential code coverage report
Current view: top level - src/fe_utils - mbprint.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 76.7 % 176 135 41 135
Current Date: 2023-04-08 17:13:01 Functions: 87.5 % 8 7 1 7
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 76.7 % 176 135 41 135
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 87.5 % 8 7 1 7

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * Multibyte character printing support for frontend code
                                  4                 :  *
                                  5                 :  *
                                  6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  7                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :  *
                                  9                 :  * src/fe_utils/mbprint.c
                                 10                 :  *
                                 11                 :  *-------------------------------------------------------------------------
                                 12                 :  */
                                 13                 : #include "postgres_fe.h"
                                 14                 : 
                                 15                 : #include "fe_utils/mbprint.h"
                                 16                 : 
                                 17                 : #include "libpq-fe.h"
                                 18                 : 
                                 19                 : 
                                 20                 : /*
                                 21                 :  * To avoid version-skew problems, this file must not use declarations
                                 22                 :  * from pg_wchar.h: the encoding IDs we are dealing with are determined
                                 23                 :  * by the libpq.so we are linked with, and that might not match the
                                 24                 :  * numbers we see at compile time.  (If this file were inside libpq,
                                 25                 :  * the problem would go away...)
                                 26                 :  *
                                 27                 :  * Hence, we have our own definition of pg_wchar, and we get the values
                                 28                 :  * of any needed encoding IDs on-the-fly.
                                 29                 :  */
                                 30                 : 
                                 31                 : typedef unsigned int pg_wchar;
                                 32                 : 
                                 33                 : static int
 4926 tgl                        34 CBC     4605072 : pg_get_utf8_id(void)
                                 35                 : {
                                 36                 :     static int  utf8_id = -1;
                                 37                 : 
 5657                            38         4605072 :     if (utf8_id < 0)
                                 39            3991 :         utf8_id = pg_char_to_encoding("utf8");
                                 40         4605072 :     return utf8_id;
                                 41                 : }
                                 42                 : 
                                 43                 : #define PG_UTF8     pg_get_utf8_id()
                                 44                 : 
                                 45                 : 
                                 46                 : /*
                                 47                 :  * Convert a UTF-8 character to a Unicode code point.
                                 48                 :  * This is a one-character version of pg_utf2wchar_with_len.
                                 49                 :  *
                                 50                 :  * No error checks here, c must point to a long-enough string.
                                 51                 :  */
                                 52                 : static pg_wchar
 4617 tgl                        53 UBC           0 : utf8_to_unicode(const unsigned char *c)
                                 54                 : {
 7836 bruce                      55               0 :     if ((*c & 0x80) == 0)
                                 56               0 :         return (pg_wchar) c[0];
                                 57               0 :     else if ((*c & 0xe0) == 0xc0)
                                 58               0 :         return (pg_wchar) (((c[0] & 0x1f) << 6) |
                                 59               0 :                            (c[1] & 0x3f));
                                 60               0 :     else if ((*c & 0xf0) == 0xe0)
                                 61               0 :         return (pg_wchar) (((c[0] & 0x0f) << 12) |
                                 62               0 :                            ((c[1] & 0x3f) << 6) |
                                 63               0 :                            (c[2] & 0x3f));
 4619 tgl                        64               0 :     else if ((*c & 0xf8) == 0xf0)
 7836 bruce                      65               0 :         return (pg_wchar) (((c[0] & 0x07) << 18) |
                                 66               0 :                            ((c[1] & 0x3f) << 12) |
                                 67               0 :                            ((c[2] & 0x3f) << 6) |
                                 68               0 :                            (c[3] & 0x3f));
                                 69                 :     else
                                 70                 :         /* that is an invalid code on purpose */
 7846 ishii                      71               0 :         return 0xffffffff;
                                 72                 : }
                                 73                 : 
                                 74                 : 
                                 75                 : /*
                                 76                 :  * Unicode 3.1 compliant validation : for each category, it checks the
                                 77                 :  * combination of each byte to make sure it maps to a valid range. It also
                                 78                 :  * returns -1 for the following UCS values: ucs > 0x10ffff ucs & 0xfffe =
                                 79                 :  * 0xfffe 0xfdd0 < ucs < 0xfdef ucs & 0xdb00 = 0xd800 (surrogates)
                                 80                 :  */
                                 81                 : static int
 7846 ishii                      82 CBC    11166761 : utf_charcheck(const unsigned char *c)
                                 83                 : {
 7836 bruce                      84        11166761 :     if ((*c & 0x80) == 0)
 7846 ishii                      85        11166081 :         return 1;
 7836 bruce                      86             680 :     else if ((*c & 0xe0) == 0xc0)
                                 87                 :     {
                                 88                 :         /* two-byte char */
                                 89             641 :         if (((c[1] & 0xc0) == 0x80) && ((c[0] & 0x1f) > 0x01))
 7846 ishii                      90             641 :             return 2;
 7846 ishii                      91 UBC           0 :         return -1;
                                 92                 :     }
 7836 bruce                      93 CBC          39 :     else if ((*c & 0xf0) == 0xe0)
                                 94                 :     {
                                 95                 :         /* three-byte char */
 7846 ishii                      96              27 :         if (((c[1] & 0xc0) == 0x80) &&
                                 97              27 :             (((c[0] & 0x0f) != 0x00) || ((c[1] & 0x20) == 0x20)) &&
 7836 bruce                      98              27 :             ((c[2] & 0xc0) == 0x80))
                                 99                 :         {
                                100              27 :             int         z = c[0] & 0x0f;
                                101              27 :             int         yx = ((c[1] & 0x3f) << 6) | (c[0] & 0x3f);
                                102              27 :             int         lx = yx & 0x7f;
                                103                 : 
                                104                 :             /* check 0xfffe/0xffff, 0xfdd0..0xfedf range, surrogates */
 7846 ishii                     105              27 :             if (((z == 0x0f) &&
 7846 ishii                     106 UBC           0 :                  (((yx & 0xffe) == 0xffe) ||
 2118 tgl                       107 CBC          27 :                   (((yx & 0xf80) == 0xd80) && (lx >= 0x30) && (lx <= 0x4f)))) ||
 7836 bruce                     108 UBC           0 :                 ((z == 0x0d) && ((yx & 0xb00) == 0x800)))
 7846 ishii                     109               0 :                 return -1;
 7846 ishii                     110 CBC          27 :             return 3;
                                111                 :         }
 7846 ishii                     112 UBC           0 :         return -1;
                                113                 :     }
 7836 bruce                     114 CBC          12 :     else if ((*c & 0xf8) == 0xf0)
                                115                 :     {
                                116              12 :         int         u = ((c[0] & 0x07) << 2) | ((c[1] & 0x30) >> 4);
                                117                 : 
                                118                 :         /* four-byte char */
 7846 ishii                     119              12 :         if (((c[1] & 0xc0) == 0x80) &&
                                120              12 :             (u > 0x00) && (u <= 0x10) &&
 7836 bruce                     121              12 :             ((c[2] & 0xc0) == 0x80) && ((c[3] & 0xc0) == 0x80))
                                122                 :         {
                                123                 :             /* test for 0xzzzzfffe/0xzzzzfffff */
 7846 ishii                     124              12 :             if (((c[1] & 0x0f) == 0x0f) && ((c[2] & 0x3f) == 0x3f) &&
 7836 bruce                     125 UBC           0 :                 ((c[3] & 0x3e) == 0x3e))
 7846 ishii                     126               0 :                 return -1;
 7846 ishii                     127 CBC          12 :             return 4;
                                128                 :         }
 7846 ishii                     129 UBC           0 :         return -1;
                                130                 :     }
                                131               0 :     return -1;
                                132                 : }
                                133                 : 
                                134                 : 
                                135                 : static void
 7846 ishii                     136 CBC     4603897 : mb_utf_validate(unsigned char *pwcs)
                                137                 : {
                                138         4603897 :     unsigned char *p = pwcs;
                                139                 : 
 7836 bruce                     140        15770658 :     while (*pwcs)
                                141                 :     {
                                142                 :         int         len;
                                143                 : 
 6267                           144        11166761 :         if ((len = utf_charcheck(pwcs)) > 0)
                                145                 :         {
 7836                           146        11166761 :             if (p != pwcs)
                                147                 :             {
                                148                 :                 int         i;
                                149                 : 
 6267 bruce                     150 UBC           0 :                 for (i = 0; i < len; i++)
 7846 ishii                     151               0 :                     *p++ = *pwcs++;
                                152                 :             }
                                153                 :             else
                                154                 :             {
 6267 bruce                     155 CBC    11166761 :                 pwcs += len;
                                156        11166761 :                 p += len;
                                157                 :             }
                                158                 :         }
                                159                 :         else
                                160                 :             /* we skip the char */
 7846 ishii                     161 UBC           0 :             pwcs++;
                                162                 :     }
 7836 bruce                     163 CBC     4603897 :     if (p != pwcs)
 7846 ishii                     164 UBC           0 :         *p = '\0';
 7846 ishii                     165 CBC     4603897 : }
                                166                 : 
                                167                 : /*
                                168                 :  * public functions : wcswidth and mbvalidate
                                169                 :  */
                                170                 : 
                                171                 : /*
                                172                 :  * pg_wcswidth is the dumb display-width function.
                                173                 :  * It assumes that everything will appear on one line.
                                174                 :  * OTOH it is easier to use than pg_wcssize if this applies to you.
                                175                 :  */
                                176                 : int
 4050 tgl                       177            1607 : pg_wcswidth(const char *pwcs, size_t len, int encoding)
                                178                 : {
 6031 bruce                     179            1607 :     int         width = 0;
                                180                 : 
 6267                           181           16033 :     while (len > 0)
                                182                 :     {
                                183                 :         int         chlen,
                                184                 :                     chwidth;
                                185                 : 
 4050 tgl                       186           14426 :         chlen = PQmblen(pwcs, encoding);
                                187           14426 :         if (len < (size_t) chlen)
 6031 bruce                     188 UBC           0 :             break;              /* Invalid string */
                                189                 : 
 4050 tgl                       190 CBC       14426 :         chwidth = PQdsplen(pwcs, encoding);
 6267 bruce                     191           14426 :         if (chwidth > 0)
                                192           14426 :             width += chwidth;
                                193                 : 
                                194           14426 :         pwcs += chlen;
 4050 tgl                       195           14426 :         len -= chlen;
                                196                 :     }
 6267 bruce                     197            1607 :     return width;
                                198                 : }
                                199                 : 
                                200                 : /*
                                201                 :  * pg_wcssize takes the given string in the given encoding and returns three
                                202                 :  * values:
                                203                 :  *    result_width: Width in display characters of the longest line in string
                                204                 :  *    result_height: Number of lines in display output
                                205                 :  *    result_format_size: Number of bytes required to store formatted
                                206                 :  *      representation of string
                                207                 :  *
                                208                 :  * This MUST be kept in sync with pg_wcsformat!
                                209                 :  */
                                210                 : void
 4228 peter_e                   211         1084666 : pg_wcssize(const unsigned char *pwcs, size_t len, int encoding,
                                212                 :            int *result_width, int *result_height, int *result_format_size)
                                213                 : {
                                214                 :     int         w,
 6031 bruce                     215         1084666 :                 chlen = 0,
                                216         1084666 :                 linewidth = 0;
                                217         1084666 :     int         width = 0;
                                218         1084666 :     int         height = 1;
                                219         1084666 :     int         format_size = 0;
                                220                 : 
 6267                           221        13050549 :     for (; *pwcs && len > 0; pwcs += chlen)
                                222                 :     {
 4058 peter_e                   223        11965883 :         chlen = PQmblen((const char *) pwcs, encoding);
 6031 bruce                     224        11965883 :         if (len < (size_t) chlen)
 6267 bruce                     225 UBC           0 :             break;
 4058 peter_e                   226 CBC    11965883 :         w = PQdsplen((const char *) pwcs, encoding);
                                227                 : 
 5947 tgl                       228        11965883 :         if (chlen == 1)         /* single-byte char */
                                229                 :         {
 6031 bruce                     230        11964523 :             if (*pwcs == '\n')  /* Newline */
                                231                 :             {
 6267                           232           17929 :                 if (linewidth > width)
                                233            4324 :                     width = linewidth;
                                234           17929 :                 linewidth = 0;
                                235           17929 :                 height += 1;
 2118 tgl                       236           17929 :                 format_size += 1;   /* For NUL char */
                                237                 :             }
                                238        11946594 :             else if (*pwcs == '\r') /* Linefeed */
                                239                 :             {
 6267 bruce                     240               8 :                 linewidth += 2;
                                241               8 :                 format_size += 2;
                                242                 :             }
 2118 tgl                       243        11946586 :             else if (*pwcs == '\t') /* Tab */
                                244                 :             {
                                245                 :                 do
                                246                 :                 {
 5448                           247            1470 :                     linewidth++;
                                248            1470 :                     format_size++;
                                249            1470 :                 } while (linewidth % 8 != 0);
                                250                 :             }
 5947                           251        11946398 :             else if (w < 0)      /* Other control char */
                                252                 :             {
 6267 bruce                     253              72 :                 linewidth += 4;
                                254              72 :                 format_size += 4;
                                255                 :             }
                                256                 :             else                /* Output it as-is */
                                257                 :             {
 5947 tgl                       258        11946326 :                 linewidth += w;
 6267 bruce                     259        11946326 :                 format_size += 1;
                                260                 :             }
                                261                 :         }
 5947 tgl                       262            1360 :         else if (w < 0)          /* Non-ascii control char */
                                263                 :         {
 6031 bruce                     264 UBC           0 :             linewidth += 6;     /* \u0000 */
 6267                           265               0 :             format_size += 6;
                                266                 :         }
                                267                 :         else                    /* All other chars */
                                268                 :         {
 6267 bruce                     269 CBC        1360 :             linewidth += w;
                                270            1360 :             format_size += chlen;
                                271                 :         }
                                272        11965883 :         len -= chlen;
                                273                 :     }
                                274         1084666 :     if (linewidth > width)
                                275         1008008 :         width = linewidth;
 5050                           276         1084666 :     format_size += 1;           /* For NUL char */
                                277                 : 
                                278                 :     /* Set results */
 6267                           279         1084666 :     if (result_width)
                                280         1084666 :         *result_width = width;
                                281         1084666 :     if (result_height)
                                282         1084666 :         *result_height = height;
                                283         1084666 :     if (result_format_size)
                                284         1082243 :         *result_format_size = format_size;
                                285         1084666 : }
                                286                 : 
                                287                 : /*
                                288                 :  *  Format a string into one or more "struct lineptr" lines.
                                289                 :  *  lines[i].ptr == NULL indicates the end of the array.
                                290                 :  *
                                291                 :  * This MUST be kept in sync with pg_wcssize!
                                292                 :  */
                                293                 : void
 4228 peter_e                   294          597013 : pg_wcsformat(const unsigned char *pwcs, size_t len, int encoding,
                                295                 :              struct lineptr *lines, int count)
                                296                 : {
                                297                 :     int         w,
 6267 bruce                     298          597013 :                 chlen = 0;
 6031                           299          597013 :     int         linewidth = 0;
                                300          597013 :     unsigned char *ptr = lines->ptr; /* Pointer to data area */
                                301                 : 
 6267                           302         6954694 :     for (; *pwcs && len > 0; pwcs += chlen)
                                303                 :     {
 4058 peter_e                   304         6357681 :         chlen = PQmblen((const char *) pwcs, encoding);
 6031 bruce                     305         6357681 :         if (len < (size_t) chlen)
 6267 bruce                     306 UBC           0 :             break;
 4058 peter_e                   307 CBC     6357681 :         w = PQdsplen((const char *) pwcs, encoding);
                                308                 : 
 5947 tgl                       309         6357681 :         if (chlen == 1)         /* single-byte char */
                                310                 :         {
 6031 bruce                     311         6357001 :             if (*pwcs == '\n')  /* Newline */
                                312                 :             {
 5947 tgl                       313            9581 :                 *ptr++ = '\0';
 6267 bruce                     314            9581 :                 lines->width = linewidth;
                                315            9581 :                 linewidth = 0;
                                316            9581 :                 lines++;
                                317            9581 :                 count--;
 5448 tgl                       318            9581 :                 if (count <= 0)
 6031 bruce                     319 UBC           0 :                     exit(1);    /* Screwup */
                                320                 : 
                                321                 :                 /* make next line point to remaining memory */
 6267 bruce                     322 CBC        9581 :                 lines->ptr = ptr;
                                323                 :             }
 2118 tgl                       324         6347420 :             else if (*pwcs == '\r') /* Linefeed */
                                325                 :             {
 6267                           326               4 :                 strcpy((char *) ptr, "\\r");
      bruce                     327               4 :                 linewidth += 2;
                                328               4 :                 ptr += 2;
                                329                 :             }
 2118 tgl                       330         6347416 :             else if (*pwcs == '\t') /* Tab */
                                331                 :             {
                                332                 :                 do
                                333                 :                 {
 5449 bruce                     334             735 :                     *ptr++ = ' ';
                                335             735 :                     linewidth++;
                                336             735 :                 } while (linewidth % 8 != 0);
                                337                 :             }
 5947 tgl                       338         6347322 :             else if (w < 0)      /* Other control char */
                                339                 :             {
 6267                           340              36 :                 sprintf((char *) ptr, "\\x%02X", *pwcs);
      bruce                     341              36 :                 linewidth += 4;
                                342              36 :                 ptr += 4;
                                343                 :             }
                                344                 :             else                /* Output it as-is */
                                345                 :             {
 5947 tgl                       346         6347286 :                 linewidth += w;
 6267 bruce                     347         6347286 :                 *ptr++ = *pwcs;
                                348                 :             }
                                349                 :         }
 5947 tgl                       350             680 :         else if (w < 0)          /* Non-ascii control char */
                                351                 :         {
 6267 bruce                     352 UBC           0 :             if (encoding == PG_UTF8)
 4617 tgl                       353               0 :                 sprintf((char *) ptr, "\\u%04X", utf8_to_unicode(pwcs));
                                354                 :             else
                                355                 :             {
                                356                 :                 /*
                                357                 :                  * This case cannot happen in the current code because only
                                358                 :                  * UTF-8 signals multibyte control characters. But we may need
                                359                 :                  * to support it at some stage
                                360                 :                  */
 6267                           361               0 :                 sprintf((char *) ptr, "\\u????");
                                362                 :             }
      bruce                     363               0 :             ptr += 6;
                                364               0 :             linewidth += 6;
                                365                 :         }
                                366                 :         else                    /* All other chars */
                                367                 :         {
                                368                 :             int         i;
                                369                 : 
 6031 bruce                     370 CBC        2091 :             for (i = 0; i < chlen; i++)
 6267                           371            1411 :                 *ptr++ = pwcs[i];
                                372             680 :             linewidth += w;
                                373                 :         }
                                374         6357681 :         len -= chlen;
                                375                 :     }
                                376          597013 :     lines->width = linewidth;
 5050                           377          597013 :     *ptr++ = '\0';              /* Terminate formatted string */
                                378                 : 
 5448 tgl                       379          597013 :     if (count <= 0)
 5050 bruce                     380 UBC           0 :         exit(1);                /* Screwup */
                                381                 : 
 5050 bruce                     382 CBC      597013 :     (lines + 1)->ptr = NULL; /* terminate line array */
 7846 ishii                     383          597013 : }
                                384                 : 
                                385                 : 
                                386                 : /*
                                387                 :  * Encoding validation: delete any unvalidatable characters from the string
                                388                 :  *
                                389                 :  * This seems redundant with existing functionality elsewhere?
                                390                 :  */
                                391                 : unsigned char *
 6267 bruce                     392         4605072 : mbvalidate(unsigned char *pwcs, int encoding)
                                393                 : {
 7327 peter_e                   394         4605072 :     if (encoding == PG_UTF8)
 4041                           395         4603897 :         mb_utf_validate(pwcs);
                                396                 :     else
                                397                 :     {
                                398                 :         /*
                                399                 :          * other encodings needing validation should add their own routines
                                400                 :          * here
                                401                 :          */
                                402                 :     }
                                403                 : 
 6406 tgl                       404         4605072 :     return pwcs;
                                405                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a