LCOV - differential code coverage report
Current view: top level - src/port - chklocale.c (source / functions) Coverage Total Hit LBC UIC GBC GIC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 82.9 % 41 34 1 6 1 23 10 6 23
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 1 1 1 1
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 82.9 % 41 34 1 6 1 23 10 6 23
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 50.0 % 2 1 1 1

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * chklocale.c
                                  4                 :  *      Functions for handling locale-related info
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                  8                 :  *
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/port/chklocale.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : 
                                 16                 : #ifndef FRONTEND
                                 17                 : #include "postgres.h"
                                 18                 : #else
                                 19                 : #include "postgres_fe.h"
                                 20                 : #endif
                                 21                 : 
                                 22                 : #ifdef HAVE_LANGINFO_H
                                 23                 : #include <langinfo.h>
                                 24                 : #endif
                                 25                 : 
                                 26                 : #include "mb/pg_wchar.h"
                                 27                 : 
                                 28                 : 
                                 29                 : /*
                                 30                 :  * This table needs to recognize all the CODESET spellings for supported
                                 31                 :  * backend encodings, as well as frontend-only encodings where possible
                                 32                 :  * (the latter case is currently only needed for initdb to recognize
                                 33                 :  * error situations).  On Windows, we rely on entries for codepage
                                 34                 :  * numbers (CPnnn).
                                 35                 :  *
                                 36                 :  * Note that we search the table with pg_strcasecmp(), so variant
                                 37                 :  * capitalizations don't need their own entries.
                                 38                 :  */
                                 39                 : struct encoding_match
                                 40                 : {
                                 41                 :     enum pg_enc pg_enc_code;
                                 42                 :     const char *system_enc_name;
                                 43                 : };
                                 44                 : 
                                 45                 : static const struct encoding_match encoding_match_list[] = {
                                 46                 :     {PG_EUC_JP, "EUC-JP"},
                                 47                 :     {PG_EUC_JP, "eucJP"},
                                 48                 :     {PG_EUC_JP, "IBM-eucJP"},
                                 49                 :     {PG_EUC_JP, "sdeckanji"},
                                 50                 :     {PG_EUC_JP, "CP20932"},
                                 51                 : 
                                 52                 :     {PG_EUC_CN, "EUC-CN"},
                                 53                 :     {PG_EUC_CN, "eucCN"},
                                 54                 :     {PG_EUC_CN, "IBM-eucCN"},
                                 55                 :     {PG_EUC_CN, "GB2312"},
                                 56                 :     {PG_EUC_CN, "dechanzi"},
                                 57                 :     {PG_EUC_CN, "CP20936"},
                                 58                 : 
                                 59                 :     {PG_EUC_KR, "EUC-KR"},
                                 60                 :     {PG_EUC_KR, "eucKR"},
                                 61                 :     {PG_EUC_KR, "IBM-eucKR"},
                                 62                 :     {PG_EUC_KR, "deckorean"},
                                 63                 :     {PG_EUC_KR, "5601"},
                                 64                 :     {PG_EUC_KR, "CP51949"},
                                 65                 : 
                                 66                 :     {PG_EUC_TW, "EUC-TW"},
                                 67                 :     {PG_EUC_TW, "eucTW"},
                                 68                 :     {PG_EUC_TW, "IBM-eucTW"},
                                 69                 :     {PG_EUC_TW, "cns11643"},
                                 70                 :     /* No codepage for EUC-TW ? */
                                 71                 : 
                                 72                 :     {PG_UTF8, "UTF-8"},
                                 73                 :     {PG_UTF8, "utf8"},
                                 74                 :     {PG_UTF8, "CP65001"},
                                 75                 : 
                                 76                 :     {PG_LATIN1, "ISO-8859-1"},
                                 77                 :     {PG_LATIN1, "ISO8859-1"},
                                 78                 :     {PG_LATIN1, "iso88591"},
                                 79                 :     {PG_LATIN1, "CP28591"},
                                 80                 : 
                                 81                 :     {PG_LATIN2, "ISO-8859-2"},
                                 82                 :     {PG_LATIN2, "ISO8859-2"},
                                 83                 :     {PG_LATIN2, "iso88592"},
                                 84                 :     {PG_LATIN2, "CP28592"},
                                 85                 : 
                                 86                 :     {PG_LATIN3, "ISO-8859-3"},
                                 87                 :     {PG_LATIN3, "ISO8859-3"},
                                 88                 :     {PG_LATIN3, "iso88593"},
                                 89                 :     {PG_LATIN3, "CP28593"},
                                 90                 : 
                                 91                 :     {PG_LATIN4, "ISO-8859-4"},
                                 92                 :     {PG_LATIN4, "ISO8859-4"},
                                 93                 :     {PG_LATIN4, "iso88594"},
                                 94                 :     {PG_LATIN4, "CP28594"},
                                 95                 : 
                                 96                 :     {PG_LATIN5, "ISO-8859-9"},
                                 97                 :     {PG_LATIN5, "ISO8859-9"},
                                 98                 :     {PG_LATIN5, "iso88599"},
                                 99                 :     {PG_LATIN5, "CP28599"},
                                100                 : 
                                101                 :     {PG_LATIN6, "ISO-8859-10"},
                                102                 :     {PG_LATIN6, "ISO8859-10"},
                                103                 :     {PG_LATIN6, "iso885910"},
                                104                 : 
                                105                 :     {PG_LATIN7, "ISO-8859-13"},
                                106                 :     {PG_LATIN7, "ISO8859-13"},
                                107                 :     {PG_LATIN7, "iso885913"},
                                108                 : 
                                109                 :     {PG_LATIN8, "ISO-8859-14"},
                                110                 :     {PG_LATIN8, "ISO8859-14"},
                                111                 :     {PG_LATIN8, "iso885914"},
                                112                 : 
                                113                 :     {PG_LATIN9, "ISO-8859-15"},
                                114                 :     {PG_LATIN9, "ISO8859-15"},
                                115                 :     {PG_LATIN9, "iso885915"},
                                116                 :     {PG_LATIN9, "CP28605"},
                                117                 : 
                                118                 :     {PG_LATIN10, "ISO-8859-16"},
                                119                 :     {PG_LATIN10, "ISO8859-16"},
                                120                 :     {PG_LATIN10, "iso885916"},
                                121                 : 
                                122                 :     {PG_KOI8R, "KOI8-R"},
                                123                 :     {PG_KOI8R, "CP20866"},
                                124                 : 
                                125                 :     {PG_KOI8U, "KOI8-U"},
                                126                 :     {PG_KOI8U, "CP21866"},
                                127                 : 
                                128                 :     {PG_WIN866, "CP866"},
                                129                 :     {PG_WIN874, "CP874"},
                                130                 :     {PG_WIN1250, "CP1250"},
                                131                 :     {PG_WIN1251, "CP1251"},
                                132                 :     {PG_WIN1251, "ansi-1251"},
                                133                 :     {PG_WIN1252, "CP1252"},
                                134                 :     {PG_WIN1253, "CP1253"},
                                135                 :     {PG_WIN1254, "CP1254"},
                                136                 :     {PG_WIN1255, "CP1255"},
                                137                 :     {PG_WIN1256, "CP1256"},
                                138                 :     {PG_WIN1257, "CP1257"},
                                139                 :     {PG_WIN1258, "CP1258"},
                                140                 : 
                                141                 :     {PG_ISO_8859_5, "ISO-8859-5"},
                                142                 :     {PG_ISO_8859_5, "ISO8859-5"},
                                143                 :     {PG_ISO_8859_5, "iso88595"},
                                144                 :     {PG_ISO_8859_5, "CP28595"},
                                145                 : 
                                146                 :     {PG_ISO_8859_6, "ISO-8859-6"},
                                147                 :     {PG_ISO_8859_6, "ISO8859-6"},
                                148                 :     {PG_ISO_8859_6, "iso88596"},
                                149                 :     {PG_ISO_8859_6, "CP28596"},
                                150                 : 
                                151                 :     {PG_ISO_8859_7, "ISO-8859-7"},
                                152                 :     {PG_ISO_8859_7, "ISO8859-7"},
                                153                 :     {PG_ISO_8859_7, "iso88597"},
                                154                 :     {PG_ISO_8859_7, "CP28597"},
                                155                 : 
                                156                 :     {PG_ISO_8859_8, "ISO-8859-8"},
                                157                 :     {PG_ISO_8859_8, "ISO8859-8"},
                                158                 :     {PG_ISO_8859_8, "iso88598"},
                                159                 :     {PG_ISO_8859_8, "CP28598"},
                                160                 : 
                                161                 :     {PG_SJIS, "SJIS"},
                                162                 :     {PG_SJIS, "PCK"},
                                163                 :     {PG_SJIS, "CP932"},
                                164                 :     {PG_SJIS, "SHIFT_JIS"},
                                165                 : 
                                166                 :     {PG_BIG5, "BIG5"},
                                167                 :     {PG_BIG5, "BIG5HKSCS"},
                                168                 :     {PG_BIG5, "Big5-HKSCS"},
                                169                 :     {PG_BIG5, "CP950"},
                                170                 : 
                                171                 :     {PG_GBK, "GBK"},
                                172                 :     {PG_GBK, "CP936"},
                                173                 : 
                                174                 :     {PG_UHC, "UHC"},
                                175                 :     {PG_UHC, "CP949"},
                                176                 : 
                                177                 :     {PG_JOHAB, "JOHAB"},
                                178                 :     {PG_JOHAB, "CP1361"},
                                179                 : 
                                180                 :     {PG_GB18030, "GB18030"},
                                181                 :     {PG_GB18030, "CP54936"},
                                182                 : 
                                183                 :     {PG_SHIFT_JIS_2004, "SJIS_2004"},
                                184                 : 
                                185                 :     {PG_SQL_ASCII, "US-ASCII"},
                                186                 : 
                                187                 :     {PG_SQL_ASCII, NULL}        /* end marker */
                                188                 : };
                                189                 : 
                                190                 : #ifdef WIN32
                                191                 : /*
                                192                 :  * On Windows, use CP<code page number> instead of the nl_langinfo() result
                                193                 :  *
                                194                 :  * This routine uses GetLocaleInfoEx() to parse short locale names like
                                195                 :  * "de-DE", "fr-FR", etc.  If those cannot be parsed correctly process falls
                                196                 :  * back to the pre-VS-2010 manual parsing done with using
                                197                 :  * <Language>_<Country>.<CodePage> as a base.
                                198                 :  *
                                199                 :  * Returns a malloc()'d string for the caller to free.
                                200                 :  */
                                201                 : static char *
                                202                 : win32_langinfo(const char *ctype)
                                203                 : {
                                204                 :     char       *r = NULL;
                                205                 :     char       *codepage;
                                206                 : 
                                207                 : #if defined(_MSC_VER)
                                208                 :     uint32      cp;
                                209                 :     WCHAR       wctype[LOCALE_NAME_MAX_LENGTH];
                                210                 : 
                                211                 :     memset(wctype, 0, sizeof(wctype));
                                212                 :     MultiByteToWideChar(CP_ACP, 0, ctype, -1, wctype, LOCALE_NAME_MAX_LENGTH);
                                213                 : 
                                214                 :     if (GetLocaleInfoEx(wctype,
                                215                 :                         LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
                                216                 :                         (LPWSTR) &cp, sizeof(cp) / sizeof(WCHAR)) > 0)
                                217                 :     {
                                218                 :         r = malloc(16);         /* excess */
                                219                 :         if (r != NULL)
                                220                 :         {
                                221                 :             /*
                                222                 :              * If the return value is CP_ACP that means no ANSI code page is
                                223                 :              * available, so only Unicode can be used for the locale.
                                224                 :              */
                                225                 :             if (cp == CP_ACP)
                                226                 :                 strcpy(r, "utf8");
                                227                 :             else
                                228                 :                 sprintf(r, "CP%u", cp);
                                229                 :         }
                                230                 :     }
                                231                 :     else
                                232                 : #endif
                                233                 :     {
                                234                 :         /*
                                235                 :          * Locale format on Win32 is <Language>_<Country>.<CodePage>.  For
                                236                 :          * example, English_United States.1252.  If we see digits after the
                                237                 :          * last dot, assume it's a codepage number.  Otherwise, we might be
                                238                 :          * dealing with a Unix-style locale string; Windows' setlocale() will
                                239                 :          * take those even though GetLocaleInfoEx() won't, so we end up here.
                                240                 :          * In that case, just return what's after the last dot and hope we can
                                241                 :          * find it in our table.
                                242                 :          */
                                243                 :         codepage = strrchr(ctype, '.');
                                244                 :         if (codepage != NULL)
                                245                 :         {
                                246                 :             size_t      ln;
                                247                 : 
                                248                 :             codepage++;
                                249                 :             ln = strlen(codepage);
                                250                 :             r = malloc(ln + 3);
                                251                 :             if (r != NULL)
                                252                 :             {
                                253                 :                 if (strspn(codepage, "0123456789") == ln)
                                254                 :                     sprintf(r, "CP%s", codepage);
                                255                 :                 else
                                256                 :                     strcpy(r, codepage);
                                257                 :             }
                                258                 :         }
                                259                 :     }
                                260                 : 
                                261                 :     return r;
                                262                 : }
                                263                 : 
                                264                 : #ifndef FRONTEND
                                265                 : /*
                                266                 :  * Given a Windows code page identifier, find the corresponding PostgreSQL
                                267                 :  * encoding.  Issue a warning and return -1 if none found.
                                268                 :  */
                                269                 : int
                                270                 : pg_codepage_to_encoding(UINT cp)
                                271                 : {
                                272                 :     char        sys[16];
                                273                 :     int         i;
                                274                 : 
                                275                 :     sprintf(sys, "CP%u", cp);
                                276                 : 
                                277                 :     /* Check the table */
                                278                 :     for (i = 0; encoding_match_list[i].system_enc_name; i++)
                                279                 :         if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
                                280                 :             return encoding_match_list[i].pg_enc_code;
                                281                 : 
                                282                 :     ereport(WARNING,
 2355 peter_e                   283 ECB             :             (errmsg("could not determine encoding for codeset \"%s\"", sys)));
                                284                 : 
                                285                 :     return -1;
                                286                 : }
                                287                 : #endif
                                288                 : #endif                          /* WIN32 */
 5667 tgl                       289                 : 
                                290                 : #if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
                                291                 : 
                                292                 : /*
                                293                 :  * Given a setting for LC_CTYPE, return the Postgres ID of the associated
                                294                 :  * encoding, if we can determine it.  Return -1 if we can't determine it.
 5672                           295                 :  *
                                296                 :  * Pass in NULL to get the encoding for the current locale setting.
 4896                           297                 :  * Pass "" to get the encoding selected by the server's environment.
                                298                 :  *
 5672                           299                 :  * If the result is PG_SQL_ASCII, callers should treat it as being compatible
 4896                           300                 :  * with any desired encoding.
 3574 noah                      301 EUB             :  *
                                302                 :  * If running in the backend and write_message is false, this function must
 3574 noah                      303 ECB             :  * cope with the possibility that elog() and palloc() are not yet usable.
 5672 tgl                       304                 :  */
 5672 tgl                       305 EUB             : int
 4443 peter_e                   306 GIC      167798 : pg_get_encoding_from_locale(const char *ctype, bool write_message)
 5672 tgl                       307 ECB             : {
                                308                 :     char       *sys;
                                309                 :     int         i;
                                310                 : 
                                311                 :     /* Get the CODESET property, and also LC_CTYPE if not passed in */
 5672 tgl                       312 GIC      167798 :     if (ctype)
                                313                 :     {
                                314                 :         char       *save;
 5667 tgl                       315 ECB             :         char       *name;
 5672                           316                 : 
 4896                           317                 :         /* If locale is C or POSIX, we can allow all encodings */
 4896 tgl                       318 GIC      309011 :         if (pg_strcasecmp(ctype, "C") == 0 ||
                                319          154199 :             pg_strcasecmp(ctype, "POSIX") == 0)
                                320             940 :             return PG_SQL_ASCII;
                                321                 : 
 5672 tgl                       322 CBC      153872 :         save = setlocale(LC_CTYPE, NULL);
                                323          153872 :         if (!save)
 4790 bruce                     324 UIC           0 :             return -1;          /* setlocale() broken? */
                                325                 :         /* must copy result, or it might change after setlocale */
 5672 tgl                       326 GIC      153872 :         save = strdup(save);
                                327          153872 :         if (!save)
 4790 bruce                     328 LBC           0 :             return -1;          /* out of memory; unlikely */
 5672 tgl                       329 ECB             : 
 5667 tgl                       330 GBC      153872 :         name = setlocale(LC_CTYPE, ctype);
 5667 tgl                       331 GIC      153872 :         if (!name)
                                332                 :         {
 5672 tgl                       333 CBC          12 :             free(save);
 4790 bruce                     334              12 :             return -1;          /* bogus ctype passed in? */
 5672 tgl                       335 ECB             :         }
                                336                 : 
                                337                 : #ifndef WIN32
 5672 tgl                       338 CBC      153860 :         sys = nl_langinfo(CODESET);
                                339          153860 :         if (sys)
                                340          153860 :             sys = strdup(sys);
                                341                 : #else
                                342                 :         sys = win32_langinfo(name);
                                343                 : #endif
                                344                 : 
 5672 tgl                       345 GIC      153860 :         setlocale(LC_CTYPE, save);
 5672 tgl                       346 CBC      153860 :         free(save);
 5672 tgl                       347 EUB             :     }
                                348                 :     else
                                349                 :     {
 5672 tgl                       350 ECB             :         /* much easier... */
 5672 tgl                       351 GIC       12986 :         ctype = setlocale(LC_CTYPE, NULL);
 5672 tgl                       352 CBC       12986 :         if (!ctype)
 4790 bruce                     353 UIC           0 :             return -1;          /* setlocale() broken? */
 4896 tgl                       354 ECB             : 
                                355                 :         /* If locale is C or POSIX, we can allow all encodings */
 4896 tgl                       356 GIC       25873 :         if (pg_strcasecmp(ctype, "C") == 0 ||
                                357           12887 :             pg_strcasecmp(ctype, "POSIX") == 0)
                                358              99 :             return PG_SQL_ASCII;
                                359                 : 
                                360                 : #ifndef WIN32
 5672                           361           12887 :         sys = nl_langinfo(CODESET);
                                362           12887 :         if (sys)
                                363           12887 :             sys = strdup(sys);
                                364                 : #else
                                365                 :         sys = win32_langinfo(ctype);
                                366                 : #endif
                                367                 :     }
                                368                 : 
                                369          166747 :     if (!sys)
 4790 bruce                     370 UIC           0 :         return -1;              /* out of memory; unlikely */
                                371                 : 
                                372                 :     /* Check the table */
 5672 tgl                       373 GIC     4925335 :     for (i = 0; encoding_match_list[i].system_enc_name; i++)
                                374                 :     {
                                375         4923517 :         if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
                                376                 :         {
                                377          164929 :             free(sys);
 5672 tgl                       378 CBC      164929 :             return encoding_match_list[i].pg_enc_code;
                                379                 :         }
                                380                 :     }
 5672 tgl                       381 EUB             : 
                                382                 :     /* Special-case kluges for particular platforms go here */
                                383                 : 
                                384                 : #ifdef __darwin__
                                385                 : 
                                386                 :     /*
                                387                 :      * Current macOS has many locales that report an empty string for CODESET,
                                388                 :      * but they all seem to actually use UTF-8.
                                389                 :      */
                                390                 :     if (strlen(sys) == 0)
                                391                 :     {
 5672 tgl                       392 ECB             :         free(sys);
                                393                 :         return PG_UTF8;
                                394                 :     }
                                395                 : #endif
                                396                 : 
                                397                 :     /*
                                398                 :      * We print a warning if we got a CODESET string but couldn't recognize
                                399                 :      * it.  This means we need another entry in the table.
                                400                 :      */
 4443 peter_e                   401 GIC        1818 :     if (write_message)
                                402                 :     {
                                403                 : #ifdef FRONTEND
 4443 peter_e                   404 UIC           0 :         fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
                                405                 :                 ctype, sys);
                                406                 :         /* keep newline separate so there's only one translatable string */
                                407               0 :         fputc('\n', stderr);
                                408                 : #else
                                409               0 :         ereport(WARNING,
                                410                 :                 (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
                                411                 :                         ctype, sys)));
                                412                 : #endif
                                413                 :     }
                                414                 : 
 5672 tgl                       415 GIC        1818 :     free(sys);
 4896                           416            1818 :     return -1;
                                417                 : }
                                418                 : #else                           /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
                                419                 : 
                                420                 : /*
                                421                 :  * stub if no multi-language platform support
                                422                 :  *
                                423                 :  * Note: we could return -1 here, but that would have the effect of
                                424                 :  * forcing users to specify an encoding to initdb on such platforms.
                                425                 :  * It seems better to silently default to SQL_ASCII.
                                426                 :  */
                                427                 : int
                                428                 : pg_get_encoding_from_locale(const char *ctype, bool write_message)
                                429                 : {
                                430                 :     return PG_SQL_ASCII;
                                431                 : }
                                432                 : 
                                433                 : #endif                          /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
        

Generated by: LCOV version v1.16-55-g56c0a2a