LCOV - differential code coverage report
Current view: top level - src/backend/utils/adt - pg_locale.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 70.1 % 795 557 120 27 82 9 14 256 187 100 204 413 11 17
Current Date: 2023-04-08 15:15:32 Functions: 86.8 % 53 46 7 29 17 7 45 1
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-----------------------------------------------------------------------
       2                 :  *
       3                 :  * PostgreSQL locale utilities
       4                 :  *
       5                 :  * Portions Copyright (c) 2002-2023, PostgreSQL Global Development Group
       6                 :  *
       7                 :  * src/backend/utils/adt/pg_locale.c
       8                 :  *
       9                 :  *-----------------------------------------------------------------------
      10                 :  */
      11                 : 
      12                 : /*----------
      13                 :  * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
      14                 :  * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
      15                 :  * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
      16                 :  * toupper(), etc. are always in the same fixed locale.
      17                 :  *
      18                 :  * LC_MESSAGES is settable at run time and will take effect
      19                 :  * immediately.
      20                 :  *
      21                 :  * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
      22                 :  * settable at run-time.  However, we don't actually set those locale
      23                 :  * categories permanently.  This would have bizarre effects like no
      24                 :  * longer accepting standard floating-point literals in some locales.
      25                 :  * Instead, we only set these locale categories briefly when needed,
      26                 :  * cache the required information obtained from localeconv() or
      27                 :  * strftime(), and then set the locale categories back to "C".
      28                 :  * The cached information is only used by the formatting functions
      29                 :  * (to_char, etc.) and the money type.  For the user, this should all be
      30                 :  * transparent.
      31                 :  *
      32                 :  * !!! NOW HEAR THIS !!!
      33                 :  *
      34                 :  * We've been bitten repeatedly by this bug, so let's try to keep it in
      35                 :  * mind in future: on some platforms, the locale functions return pointers
      36                 :  * to static data that will be overwritten by any later locale function.
      37                 :  * Thus, for example, the obvious-looking sequence
      38                 :  *          save = setlocale(category, NULL);
      39                 :  *          if (!setlocale(category, value))
      40                 :  *              fail = true;
      41                 :  *          setlocale(category, save);
      42                 :  * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
      43                 :  * will change the memory save is pointing at.  To do this sort of thing
      44                 :  * safely, you *must* pstrdup what setlocale returns the first time.
      45                 :  *
      46                 :  * The POSIX locale standard is available here:
      47                 :  *
      48                 :  *  http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
      49                 :  *----------
      50                 :  */
      51                 : 
      52                 : 
      53                 : #include "postgres.h"
      54                 : 
      55                 : #include <time.h>
      56                 : 
      57                 : #include "access/htup_details.h"
      58                 : #include "catalog/pg_collation.h"
      59                 : #include "catalog/pg_control.h"
      60                 : #include "mb/pg_wchar.h"
      61                 : #include "miscadmin.h"
      62                 : #include "utils/builtins.h"
      63                 : #include "utils/formatting.h"
      64                 : #include "utils/guc_hooks.h"
      65                 : #include "utils/hsearch.h"
      66                 : #include "utils/lsyscache.h"
      67                 : #include "utils/memutils.h"
      68                 : #include "utils/pg_locale.h"
      69                 : #include "utils/syscache.h"
      70                 : 
      71                 : #ifdef USE_ICU
      72                 : #include <unicode/ucnv.h>
      73                 : #include <unicode/ustring.h>
      74                 : #endif
      75                 : 
      76                 : #ifdef __GLIBC__
      77                 : #include <gnu/libc-version.h>
      78                 : #endif
      79                 : 
      80                 : #ifdef WIN32
      81                 : #include <shlwapi.h>
      82                 : #endif
      83                 : 
      84                 : /*
      85                 :  * This should be large enough that most strings will fit, but small enough
      86                 :  * that we feel comfortable putting it on the stack
      87                 :  */
      88                 : #define     TEXTBUFLEN          1024
      89                 : 
      90                 : #define     MAX_L10N_DATA       80
      91                 : 
      92                 : 
      93                 : /* GUC settings */
      94                 : char       *locale_messages;
      95                 : char       *locale_monetary;
      96                 : char       *locale_numeric;
      97                 : char       *locale_time;
      98                 : 
      99                 : int         icu_validation_level = ERROR;
     100                 : 
     101                 : /*
     102                 :  * lc_time localization cache.
     103                 :  *
     104                 :  * We use only the first 7 or 12 entries of these arrays.  The last array
     105                 :  * element is left as NULL for the convenience of outside code that wants
     106                 :  * to sequentially scan these arrays.
     107                 :  */
     108                 : char       *localized_abbrev_days[7 + 1];
     109                 : char       *localized_full_days[7 + 1];
     110                 : char       *localized_abbrev_months[12 + 1];
     111                 : char       *localized_full_months[12 + 1];
     112                 : 
     113                 : /* is the databases's LC_CTYPE the C locale? */
     114                 : bool        database_ctype_is_c = false;
     115                 : 
     116                 : /* indicates whether locale information cache is valid */
     117                 : static bool CurrentLocaleConvValid = false;
     118                 : static bool CurrentLCTimeValid = false;
     119                 : 
     120                 : /* Cache for collation-related knowledge */
     121                 : 
     122                 : typedef struct
     123                 : {
     124                 :     Oid         collid;         /* hash key: pg_collation OID */
     125                 :     bool        collate_is_c;   /* is collation's LC_COLLATE C? */
     126                 :     bool        ctype_is_c;     /* is collation's LC_CTYPE C? */
     127                 :     bool        flags_valid;    /* true if above flags are valid */
     128                 :     pg_locale_t locale;         /* locale_t struct, or 0 if not valid */
     129                 : } collation_cache_entry;
     130                 : 
     131                 : static HTAB *collation_cache = NULL;
     132                 : 
     133                 : 
     134                 : #if defined(WIN32) && defined(LC_MESSAGES)
     135                 : static char *IsoLocaleName(const char *);
     136                 : #endif
     137                 : 
     138                 : #ifdef USE_ICU
     139                 : /*
     140                 :  * Converter object for converting between ICU's UChar strings and C strings
     141                 :  * in database encoding.  Since the database encoding doesn't change, we only
     142                 :  * need one of these per session.
     143                 :  */
     144                 : static UConverter *icu_converter = NULL;
     145                 : 
     146                 : static UCollator *pg_ucol_open(const char *loc_str);
     147                 : static void init_icu_converter(void);
     148                 : static size_t uchar_length(UConverter *converter,
     149                 :                            const char *str, int32_t len);
     150                 : static int32_t uchar_convert(UConverter *converter,
     151                 :                              UChar *dest, int32_t destlen,
     152                 :                              const char *str, int32_t srclen);
     153                 : static void icu_set_collation_attributes(UCollator *collator, const char *loc,
     154                 :                                          UErrorCode *status);
     155                 : #endif
     156                 : 
     157                 : /*
     158                 :  * pg_perm_setlocale
     159                 :  *
     160                 :  * This wraps the libc function setlocale(), with two additions.  First, when
     161                 :  * changing LC_CTYPE, update gettext's encoding for the current message
     162                 :  * domain.  GNU gettext automatically tracks LC_CTYPE on most platforms, but
     163                 :  * not on Windows.  Second, if the operation is successful, the corresponding
     164                 :  * LC_XXX environment variable is set to match.  By setting the environment
     165                 :  * variable, we ensure that any subsequent use of setlocale(..., "") will
     166                 :  * preserve the settings made through this routine.  Of course, LC_ALL must
     167                 :  * also be unset to fully ensure that, but that has to be done elsewhere after
     168                 :  * all the individual LC_XXX variables have been set correctly.  (Thank you
     169                 :  * Perl for making this kluge necessary.)
     170                 :  */
     171                 : char *
     172 GIC       43528 : pg_perm_setlocale(int category, const char *locale)
     173                 : {
     174                 :     char       *result;
     175                 :     const char *envvar;
     176                 : 
     177                 : #ifndef WIN32
     178           43528 :     result = setlocale(category, locale);
     179                 : #else
     180                 : 
     181                 :     /*
     182                 :      * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
     183                 :      * the given value is good and set it in the environment variables. We
     184                 :      * must ignore attempts to set to "", which means "keep using the old
     185                 :      * environment value".
     186                 :      */
     187                 : #ifdef LC_MESSAGES
     188                 :     if (category == LC_MESSAGES)
     189                 :     {
     190                 :         result = (char *) locale;
     191                 :         if (locale == NULL || locale[0] == '\0')
     192                 :             return result;
     193                 :     }
     194                 :     else
     195                 : #endif
     196                 :         result = setlocale(category, locale);
     197                 : #endif                          /* WIN32 */
     198 ECB             : 
     199 GIC       43528 :     if (result == NULL)
     200 UIC           0 :         return result;          /* fall out immediately on failure */
     201                 : 
     202                 :     /*
     203                 :      * Use the right encoding in translated messages.  Under ENABLE_NLS, let
     204 ECB             :      * pg_bind_textdomain_codeset() figure it out.  Under !ENABLE_NLS, message
     205                 :      * format strings are ASCII, but database-encoding strings may enter the
     206                 :      * message via %s.  This makes the overall message encoding equal to the
     207                 :      * database encoding.
     208                 :      */
     209 GIC       43528 :     if (category == LC_CTYPE)
     210                 :     {
     211                 :         static char save_lc_ctype[LOCALE_NAME_BUFLEN];
     212                 : 
     213                 :         /* copy setlocale() return value before callee invokes it again */
     214           12785 :         strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
     215           12785 :         result = save_lc_ctype;
     216                 : 
     217                 : #ifdef ENABLE_NLS
     218           12785 :         SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
     219                 : #else
     220                 :         SetMessageEncoding(GetDatabaseEncoding());
     221                 : #endif
     222                 :     }
     223                 : 
     224           43528 :     switch (category)
     225 ECB             :     {
     226 GBC       12785 :         case LC_COLLATE:
     227 GIC       12785 :             envvar = "LC_COLLATE";
     228           12785 :             break;
     229           12785 :         case LC_CTYPE:
     230           12785 :             envvar = "LC_CTYPE";
     231           12785 :             break;
     232                 : #ifdef LC_MESSAGES
     233           10101 :         case LC_MESSAGES:
     234           10101 :             envvar = "LC_MESSAGES";
     235 ECB             : #ifdef WIN32
     236                 :             result = IsoLocaleName(locale);
     237                 :             if (result == NULL)
     238                 :                 result = (char *) locale;
     239                 :             elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
     240                 : #endif                          /* WIN32 */
     241 CBC       10101 :             break;
     242                 : #endif                          /* LC_MESSAGES */
     243 GIC        2619 :         case LC_MONETARY:
     244 CBC        2619 :             envvar = "LC_MONETARY";
     245 GIC        2619 :             break;
     246            2619 :         case LC_NUMERIC:
     247            2619 :             envvar = "LC_NUMERIC";
     248            2619 :             break;
     249            2619 :         case LC_TIME:
     250 CBC        2619 :             envvar = "LC_TIME";
     251 GIC        2619 :             break;
     252 LBC           0 :         default:
     253               0 :             elog(FATAL, "unrecognized LC category: %d", category);
     254 ECB             :             return NULL;        /* keep compiler quiet */
     255                 :     }
     256                 : 
     257 CBC       43528 :     if (setenv(envvar, result, 1) != 0)
     258 UIC           0 :         return NULL;
     259 ECB             : 
     260 CBC       43528 :     return result;
     261                 : }
     262                 : 
     263                 : 
     264                 : /*
     265                 :  * Is the locale name valid for the locale category?
     266                 :  *
     267 ECB             :  * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
     268                 :  * canonical name is stored there.  This is especially useful for figuring out
     269                 :  * what locale name "" means (ie, the server environment value).  (Actually,
     270                 :  * it seems that on most implementations that's the only thing it's good for;
     271                 :  * we could wish that setlocale gave back a canonically spelled version of
     272                 :  * the locale name, but typically it doesn't.)
     273                 :  */
     274                 : bool
     275 CBC       33149 : check_locale(int category, const char *locale, char **canonname)
     276 ECB             : {
     277                 :     char       *save;
     278 EUB             :     char       *res;
     279                 : 
     280 GIC       33149 :     if (canonname)
     281            1607 :         *canonname = NULL;      /* in case of failure */
     282                 : 
     283 CBC       33149 :     save = setlocale(category, NULL);
     284 GBC       33149 :     if (!save)
     285 UIC           0 :         return false;           /* won't happen, we hope */
     286 ECB             : 
     287                 :     /* save may be pointing at a modifiable scratch variable, see above. */
     288 GIC       33149 :     save = pstrdup(save);
     289                 : 
     290                 :     /* set the locale with setlocale, to see if it accepts it. */
     291           33149 :     res = setlocale(category, locale);
     292                 : 
     293                 :     /* save canonical name if requested. */
     294           33149 :     if (res && canonname)
     295            1605 :         *canonname = pstrdup(res);
     296                 : 
     297                 :     /* restore old value. */
     298           33149 :     if (!setlocale(category, save))
     299 UIC           0 :         elog(WARNING, "failed to restore old locale \"%s\"", save);
     300 GIC       33149 :     pfree(save);
     301 ECB             : 
     302 GIC       33149 :     return (res != NULL);
     303                 : }
     304                 : 
     305                 : 
     306 ECB             : /*
     307                 :  * GUC check/assign hooks
     308                 :  *
     309                 :  * For most locale categories, the assign hook doesn't actually set the locale
     310                 :  * permanently, just reset flags so that the next use will cache the
     311 EUB             :  * appropriate values.  (See explanation at the top of this file.)
     312                 :  *
     313                 :  * Note: we accept value = "" as selecting the postmaster's environment
     314 ECB             :  * value, whatever it was (so long as the environment setting is legal).
     315                 :  * This will have been locked down by an earlier call to pg_perm_setlocale.
     316                 :  */
     317                 : bool
     318 GIC        9039 : check_locale_monetary(char **newval, void **extra, GucSource source)
     319                 : {
     320 CBC        9039 :     return check_locale(LC_MONETARY, *newval, NULL);
     321 ECB             : }
     322                 : 
     323                 : void
     324 CBC        8949 : assign_locale_monetary(const char *newval, void *extra)
     325 EUB             : {
     326 CBC        8949 :     CurrentLocaleConvValid = false;
     327 GIC        8949 : }
     328 ECB             : 
     329                 : bool
     330 GIC        9042 : check_locale_numeric(char **newval, void **extra, GucSource source)
     331                 : {
     332            9042 :     return check_locale(LC_NUMERIC, *newval, NULL);
     333                 : }
     334                 : 
     335                 : void
     336            8955 : assign_locale_numeric(const char *newval, void *extra)
     337                 : {
     338            8955 :     CurrentLocaleConvValid = false;
     339            8955 : }
     340                 : 
     341                 : bool
     342            9042 : check_locale_time(char **newval, void **extra, GucSource source)
     343                 : {
     344 CBC        9042 :     return check_locale(LC_TIME, *newval, NULL);
     345                 : }
     346 ECB             : 
     347                 : void
     348 GIC        8952 : assign_locale_time(const char *newval, void *extra)
     349                 : {
     350 CBC        8952 :     CurrentLCTimeValid = false;
     351 GIC        8952 : }
     352 ECB             : 
     353                 : /*
     354                 :  * We allow LC_MESSAGES to actually be set globally.
     355                 :  *
     356                 :  * Note: we normally disallow value = "" because it wouldn't have consistent
     357                 :  * semantics (it'd effectively just use the previous value).  However, this
     358                 :  * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
     359                 :  * not even if the attempted setting fails due to invalid environment value.
     360                 :  * The idea there is just to accept the environment setting *if possible*
     361                 :  * during startup, until we can read the proper value from postgresql.conf.
     362                 :  */
     363                 : bool
     364 CBC        7569 : check_locale_messages(char **newval, void **extra, GucSource source)
     365 ECB             : {
     366 GIC        7569 :     if (**newval == '\0')
     367                 :     {
     368 CBC        3150 :         if (source == PGC_S_DEFAULT)
     369 GIC        3150 :             return true;
     370 ECB             :         else
     371 UIC           0 :             return false;
     372                 :     }
     373                 : 
     374 ECB             :     /*
     375                 :      * LC_MESSAGES category does not exist everywhere, but accept it anyway
     376                 :      *
     377                 :      * On Windows, we can't even check the value, so accept blindly
     378                 :      */
     379                 : #if defined(LC_MESSAGES) && !defined(WIN32)
     380 GIC        4419 :     return check_locale(LC_MESSAGES, *newval, NULL);
     381                 : #else
     382                 :     return true;
     383                 : #endif
     384                 : }
     385                 : 
     386                 : void
     387            7482 : assign_locale_messages(const char *newval, void *extra)
     388                 : {
     389                 :     /*
     390 ECB             :      * LC_MESSAGES category does not exist everywhere, but accept it anyway.
     391                 :      * We ignore failure, as per comment above.
     392                 :      */
     393                 : #ifdef LC_MESSAGES
     394 CBC        7482 :     (void) pg_perm_setlocale(LC_MESSAGES, newval);
     395 ECB             : #endif
     396 GIC        7482 : }
     397 EUB             : 
     398                 : 
     399                 : /*
     400                 :  * Frees the malloced content of a struct lconv.  (But not the struct
     401                 :  * itself.)  It's important that this not throw elog(ERROR).
     402                 :  */
     403                 : static void
     404 GIC           3 : free_struct_lconv(struct lconv *s)
     405                 : {
     406 GNC           3 :     free(s->decimal_point);
     407               3 :     free(s->thousands_sep);
     408               3 :     free(s->grouping);
     409               3 :     free(s->int_curr_symbol);
     410               3 :     free(s->currency_symbol);
     411               3 :     free(s->mon_decimal_point);
     412               3 :     free(s->mon_thousands_sep);
     413               3 :     free(s->mon_grouping);
     414               3 :     free(s->positive_sign);
     415               3 :     free(s->negative_sign);
     416 GIC           3 : }
     417                 : 
     418                 : /*
     419                 :  * Check that all fields of a struct lconv (or at least, the ones we care
     420 ECB             :  * about) are non-NULL.  The field list must match free_struct_lconv().
     421                 :  */
     422                 : static bool
     423 CBC          51 : struct_lconv_is_valid(struct lconv *s)
     424 ECB             : {
     425 CBC          51 :     if (s->decimal_point == NULL)
     426 LBC           0 :         return false;
     427 CBC          51 :     if (s->thousands_sep == NULL)
     428 LBC           0 :         return false;
     429 CBC          51 :     if (s->grouping == NULL)
     430 LBC           0 :         return false;
     431 CBC          51 :     if (s->int_curr_symbol == NULL)
     432 LBC           0 :         return false;
     433 GIC          51 :     if (s->currency_symbol == NULL)
     434 UIC           0 :         return false;
     435 GIC          51 :     if (s->mon_decimal_point == NULL)
     436 UIC           0 :         return false;
     437 GIC          51 :     if (s->mon_thousands_sep == NULL)
     438 UIC           0 :         return false;
     439 CBC          51 :     if (s->mon_grouping == NULL)
     440 UIC           0 :         return false;
     441 CBC          51 :     if (s->positive_sign == NULL)
     442 UBC           0 :         return false;
     443 CBC          51 :     if (s->negative_sign == NULL)
     444 UBC           0 :         return false;
     445 CBC          51 :     return true;
     446 EUB             : }
     447 ECB             : 
     448 EUB             : 
     449 ECB             : /*
     450 EUB             :  * Convert the strdup'd string at *str from the specified encoding to the
     451 ECB             :  * database encoding.
     452 EUB             :  */
     453 ECB             : static void
     454 GBC         408 : db_encoding_convert(int encoding, char **str)
     455 ECB             : {
     456 EUB             :     char       *pstr;
     457 ECB             :     char       *mstr;
     458 EUB             : 
     459 ECB             :     /* convert the string to the database encoding */
     460 GBC         408 :     pstr = pg_any_to_server(*str, strlen(*str), encoding);
     461 CBC         408 :     if (pstr == *str)
     462 GIC         408 :         return;                 /* no conversion happened */
     463                 : 
     464                 :     /* need it malloc'd not palloc'd */
     465 UIC           0 :     mstr = strdup(pstr);
     466               0 :     if (mstr == NULL)
     467               0 :         ereport(ERROR,
     468                 :                 (errcode(ERRCODE_OUT_OF_MEMORY),
     469                 :                  errmsg("out of memory")));
     470 ECB             : 
     471                 :     /* replace old string */
     472 UIC           0 :     free(*str);
     473               0 :     *str = mstr;
     474                 : 
     475               0 :     pfree(pstr);
     476 ECB             : }
     477                 : 
     478                 : 
     479                 : /*
     480                 :  * Return the POSIX lconv struct (contains number/money formatting
     481 EUB             :  * information) with locale information for all categories.
     482                 :  */
     483                 : struct lconv *
     484 GIC        7968 : PGLC_localeconv(void)
     485                 : {
     486                 :     static struct lconv CurrentLocaleConv;
     487                 :     static bool CurrentLocaleConvAllocated = false;
     488 EUB             :     struct lconv *extlconv;
     489                 :     struct lconv worklconv;
     490                 :     char       *save_lc_monetary;
     491                 :     char       *save_lc_numeric;
     492                 : #ifdef WIN32
     493                 :     char       *save_lc_ctype;
     494                 : #endif
     495                 : 
     496                 :     /* Did we do it already? */
     497 GIC        7968 :     if (CurrentLocaleConvValid)
     498            7917 :         return &CurrentLocaleConv;
     499                 : 
     500 ECB             :     /* Free any already-allocated storage */
     501 GIC          51 :     if (CurrentLocaleConvAllocated)
     502                 :     {
     503               3 :         free_struct_lconv(&CurrentLocaleConv);
     504               3 :         CurrentLocaleConvAllocated = false;
     505                 :     }
     506                 : 
     507                 :     /*
     508                 :      * This is tricky because we really don't want to risk throwing error
     509                 :      * while the locale is set to other than our usual settings.  Therefore,
     510                 :      * the process is: collect the usual settings, set locale to special
     511                 :      * setting, copy relevant data into worklconv using strdup(), restore
     512                 :      * normal settings, convert data to desired encoding, and finally stash
     513 ECB             :      * the collected data in CurrentLocaleConv.  This makes it safe if we
     514                 :      * throw an error during encoding conversion or run out of memory anywhere
     515                 :      * in the process.  All data pointed to by struct lconv members is
     516                 :      * allocated with strdup, to avoid premature elog(ERROR) and to allow
     517                 :      * using a single cleanup routine.
     518                 :      */
     519 CBC          51 :     memset(&worklconv, 0, sizeof(worklconv));
     520 ECB             : 
     521                 :     /* Save prevailing values of monetary and numeric locales */
     522 GIC          51 :     save_lc_monetary = setlocale(LC_MONETARY, NULL);
     523              51 :     if (!save_lc_monetary)
     524 UIC           0 :         elog(ERROR, "setlocale(NULL) failed");
     525 GIC          51 :     save_lc_monetary = pstrdup(save_lc_monetary);
     526                 : 
     527              51 :     save_lc_numeric = setlocale(LC_NUMERIC, NULL);
     528              51 :     if (!save_lc_numeric)
     529 UIC           0 :         elog(ERROR, "setlocale(NULL) failed");
     530 GIC          51 :     save_lc_numeric = pstrdup(save_lc_numeric);
     531                 : 
     532                 : #ifdef WIN32
     533                 : 
     534                 :     /*
     535 ECB             :      * The POSIX standard explicitly says that it is undefined what happens if
     536                 :      * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
     537                 :      * that implied by LC_CTYPE.  In practice, all Unix-ish platforms seem to
     538                 :      * believe that localeconv() should return strings that are encoded in the
     539                 :      * codeset implied by the LC_MONETARY or LC_NUMERIC locale name.  Hence,
     540 EUB             :      * once we have successfully collected the localeconv() results, we will
     541 ECB             :      * convert them from that codeset to the desired server encoding.
     542                 :      *
     543                 :      * Windows, of course, resolutely does things its own way; on that
     544                 :      * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
     545 EUB             :      * results.  Hence, we must temporarily set that category as well.
     546 ECB             :      */
     547                 : 
     548                 :     /* Save prevailing value of ctype locale */
     549                 :     save_lc_ctype = setlocale(LC_CTYPE, NULL);
     550                 :     if (!save_lc_ctype)
     551                 :         elog(ERROR, "setlocale(NULL) failed");
     552                 :     save_lc_ctype = pstrdup(save_lc_ctype);
     553                 : 
     554                 :     /* Here begins the critical section where we must not throw error */
     555                 : 
     556                 :     /* use numeric to set the ctype */
     557                 :     setlocale(LC_CTYPE, locale_numeric);
     558                 : #endif
     559                 : 
     560                 :     /* Get formatting information for numeric */
     561 GIC          51 :     setlocale(LC_NUMERIC, locale_numeric);
     562              51 :     extlconv = localeconv();
     563                 : 
     564                 :     /* Must copy data now in case setlocale() overwrites it */
     565              51 :     worklconv.decimal_point = strdup(extlconv->decimal_point);
     566              51 :     worklconv.thousands_sep = strdup(extlconv->thousands_sep);
     567              51 :     worklconv.grouping = strdup(extlconv->grouping);
     568                 : 
     569                 : #ifdef WIN32
     570                 :     /* use monetary to set the ctype */
     571                 :     setlocale(LC_CTYPE, locale_monetary);
     572                 : #endif
     573                 : 
     574                 :     /* Get formatting information for monetary */
     575              51 :     setlocale(LC_MONETARY, locale_monetary);
     576              51 :     extlconv = localeconv();
     577 ECB             : 
     578                 :     /* Must copy data now in case setlocale() overwrites it */
     579 GIC          51 :     worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
     580              51 :     worklconv.currency_symbol = strdup(extlconv->currency_symbol);
     581 CBC          51 :     worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
     582              51 :     worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
     583              51 :     worklconv.mon_grouping = strdup(extlconv->mon_grouping);
     584 GIC          51 :     worklconv.positive_sign = strdup(extlconv->positive_sign);
     585              51 :     worklconv.negative_sign = strdup(extlconv->negative_sign);
     586                 :     /* Copy scalar fields as well */
     587              51 :     worklconv.int_frac_digits = extlconv->int_frac_digits;
     588              51 :     worklconv.frac_digits = extlconv->frac_digits;
     589              51 :     worklconv.p_cs_precedes = extlconv->p_cs_precedes;
     590              51 :     worklconv.p_sep_by_space = extlconv->p_sep_by_space;
     591 CBC          51 :     worklconv.n_cs_precedes = extlconv->n_cs_precedes;
     592              51 :     worklconv.n_sep_by_space = extlconv->n_sep_by_space;
     593 GIC          51 :     worklconv.p_sign_posn = extlconv->p_sign_posn;
     594              51 :     worklconv.n_sign_posn = extlconv->n_sign_posn;
     595 ECB             : 
     596                 :     /*
     597                 :      * Restore the prevailing locale settings; failure to do so is fatal.
     598                 :      * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
     599                 :      * but proceeding with the wrong value of LC_CTYPE would certainly be bad
     600                 :      * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
     601                 :      * are almost certainly "C", there's really no reason that restoring those
     602                 :      * should fail.
     603                 :      */
     604                 : #ifdef WIN32
     605                 :     if (!setlocale(LC_CTYPE, save_lc_ctype))
     606                 :         elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
     607                 : #endif
     608 CBC          51 :     if (!setlocale(LC_MONETARY, save_lc_monetary))
     609 LBC           0 :         elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
     610 CBC          51 :     if (!setlocale(LC_NUMERIC, save_lc_numeric))
     611 UIC           0 :         elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
     612                 : 
     613                 :     /*
     614                 :      * At this point we've done our best to clean up, and can call functions
     615                 :      * that might possibly throw errors with a clean conscience.  But let's
     616                 :      * make sure we don't leak any already-strdup'd fields in worklconv.
     617                 :      */
     618 GIC          51 :     PG_TRY();
     619                 :     {
     620                 :         int         encoding;
     621                 : 
     622                 :         /* Release the pstrdup'd locale names */
     623              51 :         pfree(save_lc_monetary);
     624 CBC          51 :         pfree(save_lc_numeric);
     625 EUB             : #ifdef WIN32
     626 ECB             :         pfree(save_lc_ctype);
     627 EUB             : #endif
     628                 : 
     629                 :         /* If any of the preceding strdup calls failed, complain now. */
     630 GIC          51 :         if (!struct_lconv_is_valid(&worklconv))
     631 UIC           0 :             ereport(ERROR,
     632                 :                     (errcode(ERRCODE_OUT_OF_MEMORY),
     633                 :                      errmsg("out of memory")));
     634 ECB             : 
     635                 :         /*
     636                 :          * Now we must perform encoding conversion from whatever's associated
     637                 :          * with the locales into the database encoding.  If we can't identify
     638                 :          * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
     639                 :          * use PG_SQL_ASCII, which will result in just validating that the
     640                 :          * strings are OK in the database encoding.
     641                 :          */
     642 GIC          51 :         encoding = pg_get_encoding_from_locale(locale_numeric, true);
     643              51 :         if (encoding < 0)
     644 UIC           0 :             encoding = PG_SQL_ASCII;
     645                 : 
     646 CBC          51 :         db_encoding_convert(encoding, &worklconv.decimal_point);
     647 GBC          51 :         db_encoding_convert(encoding, &worklconv.thousands_sep);
     648                 :         /* grouping is not text and does not require conversion */
     649                 : 
     650 GIC          51 :         encoding = pg_get_encoding_from_locale(locale_monetary, true);
     651              51 :         if (encoding < 0)
     652 UIC           0 :             encoding = PG_SQL_ASCII;
     653                 : 
     654 GIC          51 :         db_encoding_convert(encoding, &worklconv.int_curr_symbol);
     655              51 :         db_encoding_convert(encoding, &worklconv.currency_symbol);
     656              51 :         db_encoding_convert(encoding, &worklconv.mon_decimal_point);
     657              51 :         db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
     658 ECB             :         /* mon_grouping is not text and does not require conversion */
     659 CBC          51 :         db_encoding_convert(encoding, &worklconv.positive_sign);
     660 GBC          51 :         db_encoding_convert(encoding, &worklconv.negative_sign);
     661                 :     }
     662 LBC           0 :     PG_CATCH();
     663 ECB             :     {
     664 UIC           0 :         free_struct_lconv(&worklconv);
     665               0 :         PG_RE_THROW();
     666 ECB             :     }
     667 CBC          51 :     PG_END_TRY();
     668 EUB             : 
     669                 :     /*
     670 ECB             :      * Everything is good, so save the results.
     671                 :      */
     672 CBC          51 :     CurrentLocaleConv = worklconv;
     673              51 :     CurrentLocaleConvAllocated = true;
     674 GIC          51 :     CurrentLocaleConvValid = true;
     675 CBC          51 :     return &CurrentLocaleConv;
     676 ECB             : }
     677                 : 
     678 EUB             : #ifdef WIN32
     679                 : /*
     680                 :  * On Windows, strftime() returns its output in encoding CP_ACP (the default
     681                 :  * operating system codepage for the computer), which is likely different
     682                 :  * from SERVER_ENCODING.  This is especially important in Japanese versions
     683 ECB             :  * of Windows which will use SJIS encoding, which we don't support as a
     684                 :  * server encoding.
     685                 :  *
     686                 :  * So, instead of using strftime(), use wcsftime() to return the value in
     687                 :  * wide characters (internally UTF16) and then convert to UTF8, which we
     688                 :  * know how to handle directly.
     689                 :  *
     690                 :  * Note that this only affects the calls to strftime() in this file, which are
     691                 :  * used to get the locale-aware strings. Other parts of the backend use
     692                 :  * pg_strftime(), which isn't locale-aware and does not need to be replaced.
     693                 :  */
     694                 : static size_t
     695                 : strftime_win32(char *dst, size_t dstlen,
     696                 :                const char *format, const struct tm *tm)
     697                 : {
     698                 :     size_t      len;
     699                 :     wchar_t     wformat[8];     /* formats used below need 3 chars */
     700                 :     wchar_t     wbuf[MAX_L10N_DATA];
     701                 : 
     702                 :     /*
     703                 :      * Get a wchar_t version of the format string.  We only actually use
     704                 :      * plain-ASCII formats in this file, so we can say that they're UTF8.
     705                 :      */
     706                 :     len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
     707                 :                               wformat, lengthof(wformat));
     708                 :     if (len == 0)
     709                 :         elog(ERROR, "could not convert format string from UTF-8: error code %lu",
     710                 :              GetLastError());
     711                 : 
     712                 :     len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
     713                 :     if (len == 0)
     714                 :     {
     715                 :         /*
     716                 :          * wcsftime failed, possibly because the result would not fit in
     717                 :          * MAX_L10N_DATA.  Return 0 with the contents of dst unspecified.
     718                 :          */
     719                 :         return 0;
     720                 :     }
     721                 : 
     722                 :     len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
     723                 :                               NULL, NULL);
     724                 :     if (len == 0)
     725                 :         elog(ERROR, "could not convert string to UTF-8: error code %lu",
     726                 :              GetLastError());
     727                 : 
     728                 :     dst[len] = '\0';
     729                 : 
     730                 :     return len;
     731                 : }
     732                 : 
     733                 : /* redefine strftime() */
     734                 : #define strftime(a,b,c,d) strftime_win32(a,b,c,d)
     735                 : #endif                          /* WIN32 */
     736                 : 
     737                 : /*
     738                 :  * Subroutine for cache_locale_time().
     739                 :  * Convert the given string from encoding "encoding" to the database
     740                 :  * encoding, and store the result at *dst, replacing any previous value.
     741                 :  */
     742                 : static void
     743 GIC         950 : cache_single_string(char **dst, const char *src, int encoding)
     744                 : {
     745                 :     char       *ptr;
     746                 :     char       *olddst;
     747                 : 
     748                 :     /* Convert the string to the database encoding, or validate it's OK */
     749             950 :     ptr = pg_any_to_server(src, strlen(src), encoding);
     750                 : 
     751                 :     /* Store the string in long-lived storage, replacing any previous value */
     752             950 :     olddst = *dst;
     753             950 :     *dst = MemoryContextStrdup(TopMemoryContext, ptr);
     754             950 :     if (olddst)
     755 UIC           0 :         pfree(olddst);
     756                 : 
     757                 :     /* Might as well clean up any palloc'd conversion result, too */
     758 GIC         950 :     if (ptr != src)
     759 CBC         114 :         pfree(ptr);
     760 GIC         950 : }
     761                 : 
     762                 : /*
     763                 :  * Update the lc_time localization cache variables if needed.
     764                 :  */
     765 ECB             : void
     766 GIC        9277 : cache_locale_time(void)
     767                 : {
     768 ECB             :     char        buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
     769                 :     char       *bufptr;
     770                 :     time_t      timenow;
     771 EUB             :     struct tm  *timeinfo;
     772 GIC        9277 :     bool        strftimefail = false;
     773                 :     int         encoding;
     774 ECB             :     int         i;
     775                 :     char       *save_lc_time;
     776                 : #ifdef WIN32
     777                 :     char       *save_lc_ctype;
     778                 : #endif
     779                 : 
     780                 :     /* did we do this already? */
     781 GIC        9277 :     if (CurrentLCTimeValid)
     782 CBC        9252 :         return;
     783                 : 
     784 GIC          25 :     elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
     785                 : 
     786                 :     /*
     787                 :      * As in PGLC_localeconv(), it's critical that we not throw error while
     788 ECB             :      * libc's locale settings have nondefault values.  Hence, we just call
     789                 :      * strftime() within the critical section, and then convert and save its
     790                 :      * results afterwards.
     791                 :      */
     792                 : 
     793                 :     /* Save prevailing value of time locale */
     794 GIC          25 :     save_lc_time = setlocale(LC_TIME, NULL);
     795              25 :     if (!save_lc_time)
     796 UIC           0 :         elog(ERROR, "setlocale(NULL) failed");
     797 CBC          25 :     save_lc_time = pstrdup(save_lc_time);
     798 ECB             : 
     799                 : #ifdef WIN32
     800                 : 
     801                 :     /*
     802                 :      * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
     803                 :      * must set it here.  This code looks the same as what PGLC_localeconv()
     804                 :      * does, but the underlying reason is different: this does NOT determine
     805                 :      * the encoding we'll get back from strftime_win32().
     806                 :      */
     807                 : 
     808                 :     /* Save prevailing value of ctype locale */
     809                 :     save_lc_ctype = setlocale(LC_CTYPE, NULL);
     810                 :     if (!save_lc_ctype)
     811                 :         elog(ERROR, "setlocale(NULL) failed");
     812 EUB             :     save_lc_ctype = pstrdup(save_lc_ctype);
     813 ECB             : 
     814                 :     /* use lc_time to set the ctype */
     815                 :     setlocale(LC_CTYPE, locale_time);
     816                 : #endif
     817                 : 
     818 GIC          25 :     setlocale(LC_TIME, locale_time);
     819                 : 
     820                 :     /* We use times close to current time as data for strftime(). */
     821              25 :     timenow = time(NULL);
     822              25 :     timeinfo = localtime(&timenow);
     823                 : 
     824                 :     /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
     825              25 :     bufptr = buf;
     826                 : 
     827                 :     /*
     828                 :      * MAX_L10N_DATA is sufficient buffer space for every known locale, and
     829                 :      * POSIX defines no strftime() errors.  (Buffer space exhaustion is not an
     830                 :      * error.)  An implementation might report errors (e.g. ENOMEM) by
     831                 :      * returning 0 (or, less plausibly, a negative value) and setting errno.
     832                 :      * Report errno just in case the implementation did that, but clear it in
     833                 :      * advance of the calls so we don't emit a stale, unrelated errno.
     834 ECB             :      */
     835 GIC          25 :     errno = 0;
     836                 : 
     837 ECB             :     /* localized days */
     838 CBC         200 :     for (i = 0; i < 7; i++)
     839                 :     {
     840 GIC         175 :         timeinfo->tm_wday = i;
     841 CBC         175 :         if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
     842 UIC           0 :             strftimefail = true;
     843 GIC         175 :         bufptr += MAX_L10N_DATA;
     844             175 :         if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
     845 UIC           0 :             strftimefail = true;
     846 GIC         175 :         bufptr += MAX_L10N_DATA;
     847                 :     }
     848                 : 
     849                 :     /* localized months */
     850             325 :     for (i = 0; i < 12; i++)
     851 ECB             :     {
     852 GIC         300 :         timeinfo->tm_mon = i;
     853             300 :         timeinfo->tm_mday = 1;   /* make sure we don't have invalid date */
     854 CBC         300 :         if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
     855 UIC           0 :             strftimefail = true;
     856 CBC         300 :         bufptr += MAX_L10N_DATA;
     857             300 :         if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
     858 UBC           0 :             strftimefail = true;
     859 CBC         300 :         bufptr += MAX_L10N_DATA;
     860 ECB             :     }
     861 EUB             : 
     862 ECB             :     /*
     863                 :      * Restore the prevailing locale settings; as in PGLC_localeconv(),
     864                 :      * failure to do so is fatal.
     865                 :      */
     866                 : #ifdef WIN32
     867                 :     if (!setlocale(LC_CTYPE, save_lc_ctype))
     868                 :         elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
     869                 : #endif
     870 CBC          25 :     if (!setlocale(LC_TIME, save_lc_time))
     871 UBC           0 :         elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
     872 ECB             : 
     873                 :     /*
     874 EUB             :      * At this point we've done our best to clean up, and can throw errors, or
     875 ECB             :      * call functions that might throw errors, with a clean conscience.
     876                 :      */
     877 GIC          25 :     if (strftimefail)
     878 UIC           0 :         elog(ERROR, "strftime() failed: %m");
     879                 : 
     880                 :     /* Release the pstrdup'd locale names */
     881 GIC          25 :     pfree(save_lc_time);
     882                 : #ifdef WIN32
     883                 :     pfree(save_lc_ctype);
     884                 : #endif
     885                 : 
     886 ECB             : #ifndef WIN32
     887 EUB             : 
     888                 :     /*
     889                 :      * As in PGLC_localeconv(), we must convert strftime()'s output from the
     890                 :      * encoding implied by LC_TIME to the database encoding.  If we can't
     891                 :      * identify the LC_TIME encoding, just perform encoding validation.
     892                 :      */
     893 CBC          25 :     encoding = pg_get_encoding_from_locale(locale_time, true);
     894 GBC          25 :     if (encoding < 0)
     895 UIC           0 :         encoding = PG_SQL_ASCII;
     896                 : 
     897 ECB             : #else
     898                 : 
     899                 :     /*
     900                 :      * On Windows, strftime_win32() always returns UTF8 data, so convert from
     901                 :      * that if necessary.
     902                 :      */
     903                 :     encoding = PG_UTF8;
     904                 : 
     905                 : #endif                          /* WIN32 */
     906                 : 
     907 GIC          25 :     bufptr = buf;
     908                 : 
     909 ECB             :     /* localized days */
     910 CBC         200 :     for (i = 0; i < 7; i++)
     911 EUB             :     {
     912 GIC         175 :         cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
     913             175 :         bufptr += MAX_L10N_DATA;
     914             175 :         cache_single_string(&localized_full_days[i], bufptr, encoding);
     915             175 :         bufptr += MAX_L10N_DATA;
     916                 :     }
     917              25 :     localized_abbrev_days[7] = NULL;
     918              25 :     localized_full_days[7] = NULL;
     919                 : 
     920                 :     /* localized months */
     921             325 :     for (i = 0; i < 12; i++)
     922                 :     {
     923 CBC         300 :         cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
     924 GIC         300 :         bufptr += MAX_L10N_DATA;
     925             300 :         cache_single_string(&localized_full_months[i], bufptr, encoding);
     926 CBC         300 :         bufptr += MAX_L10N_DATA;
     927                 :     }
     928              25 :     localized_abbrev_months[12] = NULL;
     929              25 :     localized_full_months[12] = NULL;
     930 ECB             : 
     931 CBC          25 :     CurrentLCTimeValid = true;
     932                 : }
     933 ECB             : 
     934                 : 
     935                 : #if defined(WIN32) && defined(LC_MESSAGES)
     936                 : /*
     937                 :  * Convert a Windows setlocale() argument to a Unix-style one.
     938                 :  *
     939                 :  * Regardless of platform, we install message catalogs under a Unix-style
     940                 :  * LL[_CC][.ENCODING][@VARIANT] naming convention.  Only LC_MESSAGES settings
     941                 :  * following that style will elicit localized interface strings.
     942                 :  *
     943                 :  * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
     944                 :  * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
     945                 :  * case-insensitive.  setlocale() returns the fully-qualified form; for
     946                 :  * example, setlocale("thaI") returns "Thai_Thailand.874".  Internally,
     947                 :  * setlocale() and _create_locale() select a "locale identifier"[1] and store
     948                 :  * it in an undocumented _locale_t field.  From that LCID, we can retrieve the
     949                 :  * ISO 639 language and the ISO 3166 country.  Character encoding does not
     950                 :  * matter, because the server and client encodings govern that.
     951                 :  *
     952                 :  * Windows Vista introduced the "locale name" concept[2], closely following
     953                 :  * RFC 4646.  Locale identifiers are now deprecated.  Starting with Visual
     954                 :  * Studio 2012, setlocale() accepts locale names in addition to the strings it
     955                 :  * accepted historically.  It does not standardize them; setlocale("Th-tH")
     956                 :  * returns "Th-tH".  setlocale(category, "") still returns a traditional
     957                 :  * string.  Furthermore, msvcr110.dll changed the undocumented _locale_t
     958                 :  * content to carry locale names instead of locale identifiers.
     959                 :  *
     960                 :  * Visual Studio 2015 should still be able to do the same as Visual Studio
     961                 :  * 2012, but the declaration of locale_name is missing in _locale_t, causing
     962                 :  * this code compilation to fail, hence this falls back instead on to
     963                 :  * enumerating all system locales by using EnumSystemLocalesEx to find the
     964                 :  * required locale name.  If the input argument is in Unix-style then we can
     965                 :  * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
     966                 :  * LOCALE_SNAME.
     967                 :  *
     968                 :  * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
     969                 :  * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
     970                 :  * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
     971                 :  * localized messages. In particular, every lc_messages setting that initdb
     972                 :  * can select automatically will yield only C-locale messages. XXX This could
     973                 :  * be fixed by running the fully-qualified locale name through a lookup table.
     974                 :  *
     975                 :  * This function returns a pointer to a static buffer bearing the converted
     976                 :  * name or NULL if conversion fails.
     977                 :  *
     978                 :  * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
     979                 :  * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
     980                 :  */
     981                 : 
     982                 : #if defined(_MSC_VER)
     983                 : 
     984                 : /*
     985                 :  * Callback function for EnumSystemLocalesEx() in get_iso_localename().
     986                 :  *
     987                 :  * This function enumerates all system locales, searching for one that matches
     988                 :  * an input with the format: <Language>[_<Country>], e.g.
     989                 :  * English[_United States]
     990                 :  *
     991                 :  * The input is a three wchar_t array as an LPARAM. The first element is the
     992                 :  * locale_name we want to match, the second element is an allocated buffer
     993                 :  * where the Unix-style locale is copied if a match is found, and the third
     994                 :  * element is the search status, 1 if a match was found, 0 otherwise.
     995                 :  */
     996                 : static BOOL CALLBACK
     997                 : search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
     998                 : {
     999                 :     wchar_t     test_locale[LOCALE_NAME_MAX_LENGTH];
    1000                 :     wchar_t   **argv;
    1001                 : 
    1002                 :     (void) (dwFlags);
    1003                 : 
    1004                 :     argv = (wchar_t **) lparam;
    1005                 :     *argv[2] = (wchar_t) 0;
    1006                 : 
    1007                 :     memset(test_locale, 0, sizeof(test_locale));
    1008                 : 
    1009                 :     /* Get the name of the <Language> in English */
    1010                 :     if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
    1011                 :                         test_locale, LOCALE_NAME_MAX_LENGTH))
    1012                 :     {
    1013                 :         /*
    1014                 :          * If the enumerated locale does not have a hyphen ("en") OR the
    1015                 :          * lc_message input does not have an underscore ("English"), we only
    1016                 :          * need to compare the <Language> tags.
    1017                 :          */
    1018                 :         if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
    1019                 :         {
    1020                 :             if (_wcsicmp(argv[0], test_locale) == 0)
    1021                 :             {
    1022                 :                 wcscpy(argv[1], pStr);
    1023                 :                 *argv[2] = (wchar_t) 1;
    1024                 :                 return FALSE;
    1025                 :             }
    1026                 :         }
    1027                 : 
    1028                 :         /*
    1029                 :          * We have to compare a full <Language>_<Country> tag, so we append
    1030                 :          * the underscore and name of the country/region in English, e.g.
    1031                 :          * "English_United States".
    1032                 :          */
    1033                 :         else
    1034                 :         {
    1035                 :             size_t      len;
    1036                 : 
    1037                 :             wcscat(test_locale, L"_");
    1038                 :             len = wcslen(test_locale);
    1039                 :             if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
    1040                 :                                 test_locale + len,
    1041                 :                                 LOCALE_NAME_MAX_LENGTH - len))
    1042                 :             {
    1043                 :                 if (_wcsicmp(argv[0], test_locale) == 0)
    1044                 :                 {
    1045                 :                     wcscpy(argv[1], pStr);
    1046                 :                     *argv[2] = (wchar_t) 1;
    1047                 :                     return FALSE;
    1048                 :                 }
    1049                 :             }
    1050                 :         }
    1051                 :     }
    1052                 : 
    1053                 :     return TRUE;
    1054                 : }
    1055                 : 
    1056                 : /*
    1057                 :  * This function converts a Windows locale name to an ISO formatted version
    1058                 :  * for Visual Studio 2015 or greater.
    1059                 :  *
    1060                 :  * Returns NULL, if no valid conversion was found.
    1061                 :  */
    1062                 : static char *
    1063                 : get_iso_localename(const char *winlocname)
    1064                 : {
    1065                 :     wchar_t     wc_locale_name[LOCALE_NAME_MAX_LENGTH];
    1066                 :     wchar_t     buffer[LOCALE_NAME_MAX_LENGTH];
    1067                 :     static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
    1068                 :     char       *period;
    1069                 :     int         len;
    1070                 :     int         ret_val;
    1071                 : 
    1072                 :     /*
    1073                 :      * Valid locales have the following syntax:
    1074                 :      * <Language>[_<Country>[.<CodePage>]]
    1075                 :      *
    1076                 :      * GetLocaleInfoEx can only take locale name without code-page and for the
    1077                 :      * purpose of this API the code-page doesn't matter.
    1078                 :      */
    1079                 :     period = strchr(winlocname, '.');
    1080                 :     if (period != NULL)
    1081                 :         len = period - winlocname;
    1082                 :     else
    1083                 :         len = pg_mbstrlen(winlocname);
    1084                 : 
    1085                 :     memset(wc_locale_name, 0, sizeof(wc_locale_name));
    1086                 :     memset(buffer, 0, sizeof(buffer));
    1087                 :     MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
    1088                 :                         LOCALE_NAME_MAX_LENGTH);
    1089                 : 
    1090                 :     /*
    1091                 :      * If the lc_messages is already a Unix-style string, we have a direct
    1092                 :      * match with LOCALE_SNAME, e.g. en-US, en_US.
    1093                 :      */
    1094                 :     ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
    1095                 :                               LOCALE_NAME_MAX_LENGTH);
    1096                 :     if (!ret_val)
    1097                 :     {
    1098                 :         /*
    1099                 :          * Search for a locale in the system that matches language and country
    1100                 :          * name.
    1101                 :          */
    1102                 :         wchar_t    *argv[3];
    1103                 : 
    1104                 :         argv[0] = wc_locale_name;
    1105                 :         argv[1] = buffer;
    1106                 :         argv[2] = (wchar_t *) &ret_val;
    1107                 :         EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
    1108                 :                             NULL);
    1109                 :     }
    1110                 : 
    1111                 :     if (ret_val)
    1112                 :     {
    1113                 :         size_t      rc;
    1114                 :         char       *hyphen;
    1115                 : 
    1116                 :         /* Locale names use only ASCII, any conversion locale suffices. */
    1117                 :         rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
    1118                 :         if (rc == -1 || rc == sizeof(iso_lc_messages))
    1119                 :             return NULL;
    1120                 : 
    1121                 :         /*
    1122                 :          * Since the message catalogs sit on a case-insensitive filesystem, we
    1123                 :          * need not standardize letter case here.  So long as we do not ship
    1124                 :          * message catalogs for which it would matter, we also need not
    1125                 :          * translate the script/variant portion, e.g.  uz-Cyrl-UZ to
    1126                 :          * uz_UZ@cyrillic.  Simply replace the hyphen with an underscore.
    1127                 :          */
    1128                 :         hyphen = strchr(iso_lc_messages, '-');
    1129                 :         if (hyphen)
    1130                 :             *hyphen = '_';
    1131                 :         return iso_lc_messages;
    1132                 :     }
    1133                 : 
    1134                 :     return NULL;
    1135                 : }
    1136                 : 
    1137                 : static char *
    1138                 : IsoLocaleName(const char *winlocname)
    1139                 : {
    1140                 :     static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
    1141                 : 
    1142                 :     if (pg_strcasecmp("c", winlocname) == 0 ||
    1143                 :         pg_strcasecmp("posix", winlocname) == 0)
    1144                 :     {
    1145                 :         strcpy(iso_lc_messages, "C");
    1146                 :         return iso_lc_messages;
    1147                 :     }
    1148                 :     else
    1149                 :         return get_iso_localename(winlocname);
    1150                 : }
    1151                 : 
    1152                 : #else                           /* !defined(_MSC_VER) */
    1153                 : 
    1154                 : static char *
    1155                 : IsoLocaleName(const char *winlocname)
    1156                 : {
    1157                 :     return NULL;                /* Not supported on MinGW */
    1158                 : }
    1159                 : 
    1160                 : #endif                          /* defined(_MSC_VER) */
    1161                 : 
    1162                 : #endif                          /* WIN32 && LC_MESSAGES */
    1163                 : 
    1164                 : 
    1165                 : /*
    1166                 :  * Detect aging strxfrm() implementations that, in a subset of locales, write
    1167                 :  * past the specified buffer length.  Affected users must update OS packages
    1168                 :  * before using PostgreSQL 9.5 or later.
    1169                 :  *
    1170                 :  * Assume that the bug can come and go from one postmaster startup to another
    1171                 :  * due to physical replication among diverse machines.  Assume that the bug's
    1172                 :  * presence will not change during the life of a particular postmaster.  Given
    1173                 :  * those assumptions, call this no less than once per postmaster startup per
    1174                 :  * LC_COLLATE setting used.  No known-affected system offers strxfrm_l(), so
    1175                 :  * there is no need to consider pg_collation locales.
    1176                 :  */
    1177                 : void
    1178 GIC       12783 : check_strxfrm_bug(void)
    1179                 : {
    1180                 :     char        buf[32];
    1181           12783 :     const int   canary = 0x7F;
    1182 CBC       12783 :     bool        ok = true;
    1183 ECB             : 
    1184                 :     /*
    1185 EUB             :      * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
    1186                 :      * 05/08 returns 18 and modifies 10 bytes.  It respects limits above or
    1187                 :      * below that range.
    1188                 :      *
    1189                 :      * The bug is present in Solaris 8 as well; it is absent in Solaris 10
    1190                 :      * 01/13 and Solaris 11.2.  Affected locales include is_IS.ISO8859-1,
    1191                 :      * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R.  Unaffected locales
    1192                 :      * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
    1193                 :      */
    1194 GIC       12783 :     buf[7] = canary;
    1195           12783 :     (void) strxfrm(buf, "ab", 7);
    1196           12783 :     if (buf[7] != canary)
    1197 LBC           0 :         ok = false;
    1198 ECB             : 
    1199                 :     /*
    1200 EUB             :      * illumos bug #1594 was present in the source tree from 2010-10-11 to
    1201                 :      * 2012-02-01.  Given an ASCII string of any length and length limit 1,
    1202 ECB             :      * affected systems ignore the length limit and modify a number of bytes
    1203 EUB             :      * one less than the return value.  The problem inputs for this bug do not
    1204                 :      * overlap those for the Solaris bug, hence a distinct test.
    1205                 :      *
    1206                 :      * Affected systems include smartos-20110926T021612Z.  Affected locales
    1207                 :      * include en_US.ISO8859-1 and en_US.UTF-8.  Unaffected locales include C.
    1208 ECB             :      */
    1209 GIC       12783 :     buf[1] = canary;
    1210           12783 :     (void) strxfrm(buf, "a", 1);
    1211           12783 :     if (buf[1] != canary)
    1212 UIC           0 :         ok = false;
    1213                 : 
    1214 GIC       12783 :     if (!ok)
    1215 UIC           0 :         ereport(ERROR,
    1216                 :                 (errcode(ERRCODE_SYSTEM_ERROR),
    1217                 :                  errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
    1218                 :                                  setlocale(LC_COLLATE, NULL)),
    1219                 :                  errhint("Apply system library package updates.")));
    1220 GIC       12783 : }
    1221                 : 
    1222                 : 
    1223                 : /*
    1224                 :  * Cache mechanism for collation information.
    1225                 :  *
    1226                 :  * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
    1227                 :  * (or POSIX), so we can optimize a few code paths in various places.
    1228                 :  * For the built-in C and POSIX collations, we can know that without even
    1229                 :  * doing a cache lookup, but we want to support aliases for C/POSIX too.
    1230                 :  * For the "default" collation, there are separate static cache variables,
    1231                 :  * since consulting the pg_collation catalog doesn't tell us what we need.
    1232                 :  *
    1233                 :  * Also, if a pg_locale_t has been requested for a collation, we cache that
    1234                 :  * for the life of a backend.
    1235                 :  *
    1236 ECB             :  * Note that some code relies on the flags not reporting false negatives
    1237                 :  * (that is, saying it's not C when it is).  For example, char2wchar()
    1238                 :  * could fail if the locale is C, so str_tolower() shouldn't call it
    1239                 :  * in that case.
    1240                 :  *
    1241                 :  * Note that we currently lack any way to flush the cache.  Since we don't
    1242                 :  * support ALTER COLLATION, this is OK.  The worst case is that someone
    1243                 :  * drops a collation, and a useless cache entry hangs around in existing
    1244                 :  * backends.
    1245                 :  */
    1246                 : 
    1247                 : static collation_cache_entry *
    1248 GIC       21478 : lookup_collation_cache(Oid collation, bool set_flags)
    1249 ECB             : {
    1250                 :     collation_cache_entry *cache_entry;
    1251                 :     bool        found;
    1252                 : 
    1253 GIC       21478 :     Assert(OidIsValid(collation));
    1254           21478 :     Assert(collation != DEFAULT_COLLATION_OID);
    1255 ECB             : 
    1256 CBC       21478 :     if (collation_cache == NULL)
    1257                 :     {
    1258                 :         /* First time through, initialize the hash table */
    1259                 :         HASHCTL     ctl;
    1260                 : 
    1261 GIC          23 :         ctl.keysize = sizeof(Oid);
    1262 CBC          23 :         ctl.entrysize = sizeof(collation_cache_entry);
    1263              23 :         collation_cache = hash_create("Collation cache", 100, &ctl,
    1264                 :                                       HASH_ELEM | HASH_BLOBS);
    1265                 :     }
    1266 ECB             : 
    1267 GIC       21478 :     cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
    1268           21478 :     if (!found)
    1269                 :     {
    1270                 :         /*
    1271                 :          * Make sure cache entry is marked invalid, in case we fail before
    1272 ECB             :          * setting things.
    1273                 :          */
    1274 GBC         148 :         cache_entry->flags_valid = false;
    1275 CBC         148 :         cache_entry->locale = 0;
    1276                 :     }
    1277 ECB             : 
    1278 GIC       21478 :     if (set_flags && !cache_entry->flags_valid)
    1279                 :     {
    1280                 :         /* Attempt to set the flags */
    1281                 :         HeapTuple   tp;
    1282                 :         Form_pg_collation collform;
    1283 ECB             : 
    1284 CBC         148 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
    1285             148 :         if (!HeapTupleIsValid(tp))
    1286 LBC           0 :             elog(ERROR, "cache lookup failed for collation %u", collation);
    1287 GIC         148 :         collform = (Form_pg_collation) GETSTRUCT(tp);
    1288 ECB             : 
    1289 CBC         148 :         if (collform->collprovider == COLLPROVIDER_LIBC)
    1290 ECB             :         {
    1291                 :             Datum       datum;
    1292                 :             const char *collcollate;
    1293                 :             const char *collctype;
    1294                 : 
    1295 GNC          56 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
    1296 GIC          56 :             collcollate = TextDatumGetCString(datum);
    1297 GNC          56 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
    1298 CBC          56 :             collctype = TextDatumGetCString(datum);
    1299                 : 
    1300 GIC          87 :             cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
    1301 CBC          31 :                                          (strcmp(collcollate, "POSIX") == 0));
    1302 GIC          87 :             cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
    1303              31 :                                        (strcmp(collctype, "POSIX") == 0));
    1304                 :         }
    1305                 :         else
    1306                 :         {
    1307              92 :             cache_entry->collate_is_c = false;
    1308              92 :             cache_entry->ctype_is_c = false;
    1309 ECB             :         }
    1310                 : 
    1311 GIC         148 :         cache_entry->flags_valid = true;
    1312                 : 
    1313             148 :         ReleaseSysCache(tp);
    1314                 :     }
    1315 ECB             : 
    1316 GBC       21478 :     return cache_entry;
    1317                 : }
    1318                 : 
    1319                 : 
    1320                 : /*
    1321                 :  * Detect whether collation's LC_COLLATE property is C
    1322 ECB             :  */
    1323                 : bool
    1324 GIC     9450429 : lc_collate_is_c(Oid collation)
    1325                 : {
    1326                 :     /*
    1327 ECB             :      * If we're asked about "collation 0", return false, so that the code will
    1328                 :      * go into the non-C path and report that the collation is bogus.
    1329                 :      */
    1330 CBC     9450429 :     if (!OidIsValid(collation))
    1331 LBC           0 :         return false;
    1332 ECB             : 
    1333                 :     /*
    1334 EUB             :      * If we're asked about the default collation, we have to inquire of the C
    1335                 :      * library.  Cache the result so we only have to compute it once.
    1336 ECB             :      */
    1337 CBC     9450429 :     if (collation == DEFAULT_COLLATION_OID)
    1338 ECB             :     {
    1339 EUB             :         static int  result = -1;
    1340                 :         char       *localeptr;
    1341 ECB             : 
    1342 CBC     6418465 :         if (default_locale.provider == COLLPROVIDER_ICU)
    1343 GIC     6412117 :             return false;
    1344                 : 
    1345            6348 :         if (result >= 0)
    1346            6340 :             return (bool) result;
    1347               8 :         localeptr = setlocale(LC_COLLATE, NULL);
    1348 CBC           8 :         if (!localeptr)
    1349 UIC           0 :             elog(ERROR, "invalid LC_COLLATE setting");
    1350 ECB             : 
    1351 GIC           8 :         if (strcmp(localeptr, "C") == 0)
    1352               1 :             result = true;
    1353               7 :         else if (strcmp(localeptr, "POSIX") == 0)
    1354 UIC           0 :             result = true;
    1355 ECB             :         else
    1356 GIC           7 :             result = false;
    1357               8 :         return (bool) result;
    1358                 :     }
    1359                 : 
    1360                 :     /*
    1361                 :      * If we're asked about the built-in C/POSIX collations, we know that.
    1362 ECB             :      */
    1363 GIC     3031964 :     if (collation == C_COLLATION_OID ||
    1364                 :         collation == POSIX_COLLATION_OID)
    1365         3022864 :         return true;
    1366                 : 
    1367                 :     /*
    1368 ECB             :      * Otherwise, we have to consult pg_collation, but we cache that.
    1369 EUB             :      */
    1370 GIC        9100 :     return (lookup_collation_cache(collation, true))->collate_is_c;
    1371                 : }
    1372                 : 
    1373                 : /*
    1374                 :  * Detect whether collation's LC_CTYPE property is C
    1375 ECB             :  */
    1376                 : bool
    1377 GIC     2989314 : lc_ctype_is_c(Oid collation)
    1378                 : {
    1379                 :     /*
    1380 ECB             :      * If we're asked about "collation 0", return false, so that the code will
    1381                 :      * go into the non-C path and report that the collation is bogus.
    1382                 :      */
    1383 CBC     2989314 :     if (!OidIsValid(collation))
    1384 LBC           0 :         return false;
    1385 ECB             : 
    1386                 :     /*
    1387 EUB             :      * If we're asked about the default collation, we have to inquire of the C
    1388                 :      * library.  Cache the result so we only have to compute it once.
    1389 ECB             :      */
    1390 GBC     2989314 :     if (collation == DEFAULT_COLLATION_OID)
    1391 ECB             :     {
    1392 EUB             :         static int  result = -1;
    1393                 :         char       *localeptr;
    1394 ECB             : 
    1395 CBC     1560268 :         if (default_locale.provider == COLLPROVIDER_ICU)
    1396 GIC     1560262 :             return false;
    1397                 : 
    1398               6 :         if (result >= 0)
    1399               3 :             return (bool) result;
    1400               3 :         localeptr = setlocale(LC_CTYPE, NULL);
    1401 CBC           3 :         if (!localeptr)
    1402 UIC           0 :             elog(ERROR, "invalid LC_CTYPE setting");
    1403 ECB             : 
    1404 GIC           3 :         if (strcmp(localeptr, "C") == 0)
    1405 UIC           0 :             result = true;
    1406 GIC           3 :         else if (strcmp(localeptr, "POSIX") == 0)
    1407 UIC           0 :             result = true;
    1408 ECB             :         else
    1409 GIC           3 :             result = false;
    1410               3 :         return (bool) result;
    1411                 :     }
    1412                 : 
    1413                 :     /*
    1414 ECB             :      * If we're asked about the built-in C/POSIX collations, we know that.
    1415                 :      */
    1416 GIC     1429046 :     if (collation == C_COLLATION_OID ||
    1417                 :         collation == POSIX_COLLATION_OID)
    1418         1427352 :         return true;
    1419                 : 
    1420                 :     /*
    1421 ECB             :      * Otherwise, we have to consult pg_collation, but we cache that.
    1422                 :      */
    1423 GIC        1694 :     return (lookup_collation_cache(collation, true))->ctype_is_c;
    1424                 : }
    1425                 : 
    1426                 : struct pg_locale_struct default_locale;
    1427 ECB             : 
    1428                 : void
    1429 GIC       10206 : make_icu_collator(const char *iculocstr,
    1430                 :                   const char *icurules,
    1431                 :                   struct pg_locale_struct *resultp)
    1432                 : {
    1433                 : #ifdef USE_ICU
    1434                 :     UCollator  *collator;
    1435 ECB             : 
    1436 GNC       10206 :     collator = pg_ucol_open(iculocstr);
    1437 ECB             : 
    1438                 :     /*
    1439                 :      * If rules are specified, we extract the rules of the standard collation,
    1440                 :      * add our own rules, and make a new collator with the combined rules.
    1441                 :      */
    1442 GNC       10204 :     if (icurules)
    1443                 :     {
    1444                 :         const UChar *default_rules;
    1445                 :         UChar      *agg_rules;
    1446                 :         UChar      *my_rules;
    1447                 :         UErrorCode  status;
    1448                 :         int32_t     length;
    1449                 : 
    1450               6 :         default_rules = ucol_getRules(collator, &length);
    1451               6 :         icu_to_uchar(&my_rules, icurules, strlen(icurules));
    1452                 : 
    1453               6 :         agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
    1454               6 :         u_strcpy(agg_rules, default_rules);
    1455               6 :         u_strcat(agg_rules, my_rules);
    1456                 : 
    1457               6 :         ucol_close(collator);
    1458                 : 
    1459               6 :         status = U_ZERO_ERROR;
    1460               6 :         collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
    1461                 :                                   UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
    1462               6 :         if (U_FAILURE(status))
    1463               3 :             ereport(ERROR,
    1464                 :                     (errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
    1465                 :                             iculocstr, icurules, u_errorName(status))));
    1466                 :     }
    1467 ECB             : 
    1468                 :     /* We will leak this string if the caller errors later :-( */
    1469 CBC       10201 :     resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
    1470           10201 :     resultp->info.icu.ucol = collator;
    1471                 : #else                           /* not USE_ICU */
    1472                 :     /* could get here if a collation was created by a build with ICU */
    1473                 :     ereport(ERROR,
    1474                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1475                 :              errmsg("ICU is not supported in this build")));
    1476 ECB             : #endif                          /* not USE_ICU */
    1477 CBC       10201 : }
    1478                 : 
    1479                 : 
    1480                 : /* simple subroutine for reporting errors from newlocale() */
    1481                 : #ifdef HAVE_LOCALE_T
    1482                 : static void
    1483 GIC           3 : report_newlocale_failure(const char *localename)
    1484 ECB             : {
    1485                 :     int         save_errno;
    1486                 : 
    1487                 :     /*
    1488                 :      * Windows doesn't provide any useful error indication from
    1489                 :      * _create_locale(), and BSD-derived platforms don't seem to feel they
    1490                 :      * need to set errno either (even though POSIX is pretty clear that
    1491                 :      * newlocale should do so).  So, if errno hasn't been set, assume ENOENT
    1492                 :      * is what to report.
    1493                 :      */
    1494 GIC           3 :     if (errno == 0)
    1495               3 :         errno = ENOENT;
    1496                 : 
    1497                 :     /*
    1498                 :      * ENOENT means "no such locale", not "no such file", so clarify that
    1499                 :      * errno with an errdetail message.
    1500                 :      */
    1501 CBC           3 :     save_errno = errno;         /* auxiliary funcs might change errno */
    1502               3 :     ereport(ERROR,
    1503                 :             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1504                 :              errmsg("could not create locale \"%s\": %m",
    1505                 :                     localename),
    1506                 :              (save_errno == ENOENT ?
    1507                 :               errdetail("The operating system could not find any locale data for the locale name \"%s\".",
    1508 ECB             :                         localename) : 0)));
    1509                 : }
    1510                 : #endif                          /* HAVE_LOCALE_T */
    1511                 : 
    1512                 : bool
    1513 GNC     5377091 : pg_locale_deterministic(pg_locale_t locale)
    1514                 : {
    1515                 :     /* default locale must always be deterministic */
    1516         5377091 :     if (locale == NULL)
    1517          296865 :         return true;
    1518                 :     else
    1519         5080226 :         return locale->deterministic;
    1520                 : }
    1521                 : 
    1522                 : /*
    1523                 :  * Create a locale_t from a collation OID.  Results are cached for the
    1524                 :  * lifetime of the backend.  Thus, do not free the result with freelocale().
    1525                 :  *
    1526                 :  * As a special optimization, the default/database collation returns 0.
    1527                 :  * Callers should then revert to the non-locale_t-enabled code path.
    1528                 :  * Also, callers should avoid calling this before going down a C/POSIX
    1529 ECB             :  * fastpath, because such a fastpath should work even on platforms without
    1530                 :  * locale_t support in the C library.
    1531                 :  *
    1532                 :  * For simplicity, we always generate COLLATE + CTYPE even though we
    1533                 :  * might only need one of them.  Since this is called only once per session,
    1534                 :  * it shouldn't cost much.
    1535                 :  */
    1536                 : pg_locale_t
    1537 GIC     7986982 : pg_newlocale_from_collation(Oid collid)
    1538                 : {
    1539                 :     collation_cache_entry *cache_entry;
    1540                 : 
    1541                 :     /* Callers must pass a valid OID */
    1542         7986982 :     Assert(OidIsValid(collid));
    1543                 : 
    1544         7986982 :     if (collid == DEFAULT_COLLATION_OID)
    1545                 :     {
    1546         7976298 :         if (default_locale.provider == COLLPROVIDER_ICU)
    1547         7969945 :             return &default_locale;
    1548                 :         else
    1549            6353 :             return (pg_locale_t) 0;
    1550                 :     }
    1551                 : 
    1552           10684 :     cache_entry = lookup_collation_cache(collid, false);
    1553 ECB             : 
    1554 GIC       10684 :     if (cache_entry->locale == 0)
    1555                 :     {
    1556                 :         /* We haven't computed this yet in this session, so do it */
    1557                 :         HeapTuple   tp;
    1558 ECB             :         Form_pg_collation collform;
    1559                 :         struct pg_locale_struct result;
    1560                 :         pg_locale_t resultp;
    1561                 :         Datum       datum;
    1562                 :         bool        isnull;
    1563                 : 
    1564 GIC         113 :         tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
    1565 CBC         113 :         if (!HeapTupleIsValid(tp))
    1566 UIC           0 :             elog(ERROR, "cache lookup failed for collation %u", collid);
    1567 GIC         113 :         collform = (Form_pg_collation) GETSTRUCT(tp);
    1568 ECB             : 
    1569                 :         /* We'll fill in the result struct locally before allocating memory */
    1570 CBC         113 :         memset(&result, 0, sizeof(result));
    1571 GIC         113 :         result.provider = collform->collprovider;
    1572             113 :         result.deterministic = collform->collisdeterministic;
    1573                 : 
    1574             113 :         if (collform->collprovider == COLLPROVIDER_LIBC)
    1575                 :         {
    1576                 : #ifdef HAVE_LOCALE_T
    1577                 :             const char *collcollate;
    1578                 :             const char *collctype pg_attribute_unused();
    1579                 :             locale_t    loc;
    1580 ECB             : 
    1581 GNC          21 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
    1582 CBC          21 :             collcollate = TextDatumGetCString(datum);
    1583 GNC          21 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
    1584 CBC          21 :             collctype = TextDatumGetCString(datum);
    1585 ECB             : 
    1586 CBC          21 :             if (strcmp(collcollate, collctype) == 0)
    1587                 :             {
    1588 ECB             :                 /* Normal case where they're the same */
    1589 GIC          21 :                 errno = 0;
    1590                 : #ifndef WIN32
    1591              21 :                 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
    1592                 :                                 NULL);
    1593                 : #else
    1594                 :                 loc = _create_locale(LC_ALL, collcollate);
    1595 ECB             : #endif
    1596 CBC          21 :                 if (!loc)
    1597               3 :                     report_newlocale_failure(collcollate);
    1598 ECB             :             }
    1599                 :             else
    1600                 :             {
    1601                 : #ifndef WIN32
    1602                 :                 /* We need two newlocale() steps */
    1603                 :                 locale_t    loc1;
    1604                 : 
    1605 LBC           0 :                 errno = 0;
    1606 UIC           0 :                 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
    1607               0 :                 if (!loc1)
    1608               0 :                     report_newlocale_failure(collcollate);
    1609               0 :                 errno = 0;
    1610 LBC           0 :                 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
    1611               0 :                 if (!loc)
    1612 UIC           0 :                     report_newlocale_failure(collctype);
    1613                 : #else
    1614                 : 
    1615                 :                 /*
    1616                 :                  * XXX The _create_locale() API doesn't appear to support
    1617                 :                  * this. Could perhaps be worked around by changing
    1618                 :                  * pg_locale_t to contain two separate fields.
    1619 EUB             :                  */
    1620                 :                 ereport(ERROR,
    1621                 :                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1622                 :                          errmsg("collations with different collate and ctype values are not supported on this platform")));
    1623                 : #endif
    1624                 :             }
    1625                 : 
    1626 GBC          18 :             result.info.lt = loc;
    1627                 : #else                           /* not HAVE_LOCALE_T */
    1628                 :             /* platform that doesn't support locale_t */
    1629                 :             ereport(ERROR,
    1630                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    1631                 :                      errmsg("collation provider LIBC is not supported on this platform")));
    1632                 : #endif                          /* not HAVE_LOCALE_T */
    1633                 :         }
    1634 GIC          92 :         else if (collform->collprovider == COLLPROVIDER_ICU)
    1635                 :         {
    1636                 :             const char *iculocstr;
    1637                 :             const char *icurules;
    1638                 : 
    1639 GNC          92 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colliculocale);
    1640 CBC          92 :             iculocstr = TextDatumGetCString(datum);
    1641                 : 
    1642 GNC          92 :             datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
    1643              92 :             if (!isnull)
    1644               6 :                 icurules = TextDatumGetCString(datum);
    1645                 :             else
    1646              86 :                 icurules = NULL;
    1647                 : 
    1648              92 :             make_icu_collator(iculocstr, icurules, &result);
    1649                 :         }
    1650                 : 
    1651 GIC         107 :         datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
    1652                 :                                 &isnull);
    1653             107 :         if (!isnull)
    1654                 :         {
    1655 ECB             :             char       *actual_versionstr;
    1656                 :             char       *collversionstr;
    1657                 : 
    1658 GIC         104 :             collversionstr = TextDatumGetCString(datum);
    1659                 : 
    1660 GNC         104 :             datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate);
    1661                 : 
    1662 CBC         104 :             actual_versionstr = get_collation_actual_version(collform->collprovider,
    1663             104 :                                                              TextDatumGetCString(datum));
    1664             104 :             if (!actual_versionstr)
    1665                 :             {
    1666 ECB             :                 /*
    1667                 :                  * This could happen when specifying a version in CREATE
    1668                 :                  * COLLATION but the provider does not support versioning, or
    1669                 :                  * manually creating a mess in the catalogs.
    1670                 :                  */
    1671 LBC           0 :                 ereport(ERROR,
    1672                 :                         (errmsg("collation \"%s\" has no actual version, but a version was recorded",
    1673 ECB             :                                 NameStr(collform->collname))));
    1674                 :             }
    1675                 : 
    1676 GIC         104 :             if (strcmp(actual_versionstr, collversionstr) != 0)
    1677 UIC           0 :                 ereport(WARNING,
    1678 ECB             :                         (errmsg("collation \"%s\" has version mismatch",
    1679                 :                                 NameStr(collform->collname)),
    1680                 :                          errdetail("The collation in the database was created using version %s, "
    1681                 :                                    "but the operating system provides version %s.",
    1682                 :                                    collversionstr, actual_versionstr),
    1683                 :                          errhint("Rebuild all objects affected by this collation and run "
    1684                 :                                  "ALTER COLLATION %s REFRESH VERSION, "
    1685                 :                                  "or build PostgreSQL with the right library version.",
    1686                 :                                  quote_qualified_identifier(get_namespace_name(collform->collnamespace),
    1687                 :                                                             NameStr(collform->collname)))));
    1688                 :         }
    1689                 : 
    1690 GIC         107 :         ReleaseSysCache(tp);
    1691 EUB             : 
    1692                 :         /* We'll keep the pg_locale_t structures in TopMemoryContext */
    1693 GIC         107 :         resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
    1694             107 :         *resultp = result;
    1695                 : 
    1696 CBC         107 :         cache_entry->locale = resultp;
    1697 EUB             :     }
    1698                 : 
    1699 GIC       10678 :     return cache_entry->locale;
    1700                 : }
    1701                 : 
    1702                 : /*
    1703                 :  * Get provider-specific collation version string for the given collation from
    1704                 :  * the operating system/library.
    1705                 :  */
    1706                 : char *
    1707          452360 : get_collation_actual_version(char collprovider, const char *collcollate)
    1708                 : {
    1709          452360 :     char       *collversion = NULL;
    1710 ECB             : 
    1711                 : #ifdef USE_ICU
    1712 GIC      452360 :     if (collprovider == COLLPROVIDER_ICU)
    1713 ECB             :     {
    1714                 :         UCollator  *collator;
    1715                 :         UVersionInfo versioninfo;
    1716                 :         char        buf[U_MAX_VERSION_STRING_LENGTH];
    1717                 : 
    1718 GNC      255271 :         collator = pg_ucol_open(collcollate);
    1719                 : 
    1720 GIC      255271 :         ucol_getVersion(collator, versioninfo);
    1721          255271 :         ucol_close(collator);
    1722 ECB             : 
    1723 GIC      255271 :         u_versionToString(versioninfo, buf);
    1724 CBC      255271 :         collversion = pstrdup(buf);
    1725                 :     }
    1726                 :     else
    1727 ECB             : #endif
    1728 GIC      394178 :         if (collprovider == COLLPROVIDER_LIBC &&
    1729          394145 :             pg_strcasecmp("C", collcollate) != 0 &&
    1730          393506 :             pg_strncasecmp("C.", collcollate, 2) != 0 &&
    1731          196450 :             pg_strcasecmp("POSIX", collcollate) != 0)
    1732                 :     {
    1733 ECB             : #if defined(__GLIBC__)
    1734                 :         /* Use the glibc version because we don't have anything better. */
    1735 CBC      196438 :         collversion = pstrdup(gnu_get_libc_version());
    1736 ECB             : #elif defined(LC_VERSION_MASK)
    1737                 :         locale_t    loc;
    1738                 : 
    1739                 :         /* Look up FreeBSD collation version. */
    1740                 :         loc = newlocale(LC_COLLATE, collcollate, NULL);
    1741                 :         if (loc)
    1742                 :         {
    1743                 :             collversion =
    1744                 :                 pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
    1745                 :             freelocale(loc);
    1746                 :         }
    1747                 :         else
    1748                 :             ereport(ERROR,
    1749                 :                     (errmsg("could not load locale \"%s\"", collcollate)));
    1750                 : #elif defined(WIN32)
    1751                 :         /*
    1752                 :          * If we are targeting Windows Vista and above, we can ask for a name
    1753                 :          * given a collation name (earlier versions required a location code
    1754                 :          * that we don't have).
    1755                 :          */
    1756                 :         NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
    1757                 :         WCHAR       wide_collcollate[LOCALE_NAME_MAX_LENGTH];
    1758                 : 
    1759                 :         MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
    1760                 :                             LOCALE_NAME_MAX_LENGTH);
    1761                 :         if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
    1762                 :         {
    1763                 :             /*
    1764                 :              * GetNLSVersionEx() wants a language tag such as "en-US", not a
    1765                 :              * locale name like "English_United States.1252".  Until those
    1766                 :              * values can be prevented from entering the system, or 100%
    1767                 :              * reliably converted to the more useful tag format, tolerate the
    1768                 :              * resulting error and report that we have no version data.
    1769                 :              */
    1770                 :             if (GetLastError() == ERROR_INVALID_PARAMETER)
    1771                 :                 return NULL;
    1772                 : 
    1773                 :             ereport(ERROR,
    1774                 :                     (errmsg("could not get collation version for locale \"%s\": error code %lu",
    1775                 :                             collcollate,
    1776                 :                             GetLastError())));
    1777                 :         }
    1778                 :         collversion = psprintf("%lu.%lu,%lu.%lu",
    1779                 :                                (version.dwNLSVersion >> 8) & 0xFFFF,
    1780                 :                                version.dwNLSVersion & 0xFF,
    1781                 :                                (version.dwDefinedVersion >> 8) & 0xFFFF,
    1782                 :                                version.dwDefinedVersion & 0xFF);
    1783                 : #endif
    1784                 :     }
    1785                 : 
    1786 GIC      452360 :     return collversion;
    1787                 : }
    1788                 : 
    1789                 : /*
    1790                 :  * pg_strncoll_libc_win32_utf8
    1791                 :  *
    1792                 :  * Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
    1793                 :  * invoke wcscoll() or wcscoll_l().
    1794                 :  */
    1795                 : #ifdef WIN32
    1796                 : static int
    1797                 : pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
    1798                 :                             size_t len2, pg_locale_t locale)
    1799                 : {
    1800                 :     char        sbuf[TEXTBUFLEN];
    1801                 :     char       *buf = sbuf;
    1802                 :     char       *a1p,
    1803                 :                *a2p;
    1804                 :     int         a1len = len1 * 2 + 2;
    1805                 :     int         a2len = len2 * 2 + 2;
    1806                 :     int         r;
    1807                 :     int         result;
    1808                 : 
    1809                 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    1810                 :     Assert(GetDatabaseEncoding() == PG_UTF8);
    1811                 : #ifndef WIN32
    1812                 :     Assert(false);
    1813                 : #endif
    1814                 : 
    1815                 :     if (a1len + a2len > TEXTBUFLEN)
    1816                 :         buf = palloc(a1len + a2len);
    1817                 : 
    1818                 :     a1p = buf;
    1819                 :     a2p = buf + a1len;
    1820                 : 
    1821                 :     /* API does not work for zero-length input */
    1822                 :     if (len1 == 0)
    1823                 :         r = 0;
    1824                 :     else
    1825                 :     {
    1826                 :         r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
    1827                 :                                 (LPWSTR) a1p, a1len / 2);
    1828                 :         if (!r)
    1829                 :             ereport(ERROR,
    1830                 :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1831                 :                             GetLastError())));
    1832                 :     }
    1833                 :     ((LPWSTR) a1p)[r] = 0;
    1834                 : 
    1835                 :     if (len2 == 0)
    1836                 :         r = 0;
    1837                 :     else
    1838                 :     {
    1839                 :         r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
    1840                 :                                 (LPWSTR) a2p, a2len / 2);
    1841                 :         if (!r)
    1842                 :             ereport(ERROR,
    1843                 :                     (errmsg("could not convert string to UTF-16: error code %lu",
    1844                 :                             GetLastError())));
    1845                 :     }
    1846                 :     ((LPWSTR) a2p)[r] = 0;
    1847                 : 
    1848                 :     errno = 0;
    1849                 : #ifdef HAVE_LOCALE_T
    1850                 :     if (locale)
    1851                 :         result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
    1852                 :     else
    1853                 : #endif
    1854                 :         result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
    1855                 :     if (result == 2147483647)   /* _NLSCMPERROR; missing from mingw
    1856                 :                                  * headers */
    1857                 :         ereport(ERROR,
    1858                 :                 (errmsg("could not compare Unicode strings: %m")));
    1859                 : 
    1860                 :     if (buf != sbuf)
    1861                 :         pfree(buf);
    1862                 : 
    1863                 :     return result;
    1864                 : }
    1865                 : #endif                          /* WIN32 */
    1866                 : 
    1867                 : /*
    1868                 :  * pg_strcoll_libc
    1869                 :  *
    1870                 :  * Call strcoll(), strcoll_l(), wcscoll(), or wcscoll_l() as appropriate for
    1871                 :  * the given locale, platform, and database encoding. If the locale is NULL,
    1872                 :  * use the database collation.
    1873                 :  *
    1874                 :  * Arguments must be encoded in the database encoding and nul-terminated.
    1875                 :  */
    1876                 : static int
    1877 GNC         975 : pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
    1878                 : {
    1879                 :     int result;
    1880                 : 
    1881             975 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    1882                 : #ifdef WIN32
    1883                 :     if (GetDatabaseEncoding() == PG_UTF8)
    1884                 :     {
    1885                 :         size_t len1 = strlen(arg1);
    1886                 :         size_t len2 = strlen(arg2);
    1887                 :         result = pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
    1888                 :     }
    1889                 :     else
    1890                 : #endif                          /* WIN32 */
    1891             975 :     if (locale)
    1892                 :     {
    1893                 : #ifdef HAVE_LOCALE_T
    1894             975 :         result = strcoll_l(arg1, arg2, locale->info.lt);
    1895                 : #else
    1896                 :         /* shouldn't happen */
    1897                 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    1898                 : #endif
    1899                 :     }
    1900                 :     else
    1901 UNC           0 :         result = strcoll(arg1, arg2);
    1902                 : 
    1903 GNC         975 :     return result;
    1904                 : }
    1905                 : 
    1906                 : /*
    1907                 :  * pg_strncoll_libc
    1908                 :  *
    1909                 :  * Nul-terminate the arguments and call pg_strcoll_libc().
    1910                 :  */
    1911                 : static int
    1912             252 : pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
    1913                 :                  pg_locale_t locale)
    1914                 : {
    1915                 :     char     sbuf[TEXTBUFLEN];
    1916             252 :     char    *buf      = sbuf;
    1917             252 :     size_t   bufsize1 = len1 + 1;
    1918             252 :     size_t   bufsize2 = len2 + 1;
    1919                 :     char    *arg1n;
    1920                 :     char    *arg2n;
    1921                 :     int      result;
    1922                 : 
    1923             252 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    1924                 : 
    1925                 : #ifdef WIN32
    1926                 :     /* check for this case before doing the work for nul-termination */
    1927                 :     if (GetDatabaseEncoding() == PG_UTF8)
    1928                 :         return pg_strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
    1929                 : #endif                          /* WIN32 */
    1930                 : 
    1931             252 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
    1932 UNC           0 :         buf = palloc(bufsize1 + bufsize2);
    1933                 : 
    1934 GNC         252 :     arg1n = buf;
    1935             252 :     arg2n = buf + bufsize1;
    1936                 : 
    1937                 :     /* nul-terminate arguments */
    1938             252 :     memcpy(arg1n, arg1, len1);
    1939             252 :     arg1n[len1] = '\0';
    1940             252 :     memcpy(arg2n, arg2, len2);
    1941             252 :     arg2n[len2] = '\0';
    1942                 : 
    1943             252 :     result = pg_strcoll_libc(arg1n, arg2n, locale);
    1944                 : 
    1945             252 :     if (buf != sbuf)
    1946 UNC           0 :         pfree(buf);
    1947                 : 
    1948 GNC         252 :     return result;
    1949                 : }
    1950                 : 
    1951                 : #ifdef USE_ICU
    1952                 : 
    1953                 : /*
    1954                 :  * pg_strncoll_icu_no_utf8
    1955                 :  *
    1956                 :  * Convert the arguments from the database encoding to UChar strings, then
    1957                 :  * call ucol_strcoll(). An argument length of -1 means that the string is
    1958                 :  * NUL-terminated.
    1959                 :  *
    1960                 :  * When the database encoding is UTF-8, and ICU supports ucol_strcollUTF8(),
    1961                 :  * caller should call that instead.
    1962                 :  */
    1963                 : static int
    1964 UNC           0 : pg_strncoll_icu_no_utf8(const char *arg1, int32_t len1,
    1965                 :                         const char *arg2, int32_t len2, pg_locale_t locale)
    1966                 : {
    1967                 :     char     sbuf[TEXTBUFLEN];
    1968               0 :     char    *buf = sbuf;
    1969                 :     int32_t  ulen1;
    1970                 :     int32_t  ulen2;
    1971                 :     size_t   bufsize1;
    1972                 :     size_t   bufsize2;
    1973                 :     UChar   *uchar1,
    1974                 :             *uchar2;
    1975                 :     int      result;
    1976                 : 
    1977               0 :     Assert(locale->provider == COLLPROVIDER_ICU);
    1978                 : #ifdef HAVE_UCOL_STRCOLLUTF8
    1979               0 :     Assert(GetDatabaseEncoding() != PG_UTF8);
    1980                 : #endif
    1981                 : 
    1982               0 :     init_icu_converter();
    1983                 : 
    1984               0 :     ulen1 = uchar_length(icu_converter, arg1, len1);
    1985               0 :     ulen2 = uchar_length(icu_converter, arg2, len2);
    1986                 : 
    1987               0 :     bufsize1 = (ulen1 + 1) * sizeof(UChar);
    1988               0 :     bufsize2 = (ulen2 + 1) * sizeof(UChar);
    1989                 : 
    1990               0 :     if (bufsize1 + bufsize2 > TEXTBUFLEN)
    1991               0 :         buf = palloc(bufsize1 + bufsize2);
    1992                 : 
    1993               0 :     uchar1 = (UChar *) buf;
    1994               0 :     uchar2 = (UChar *) (buf + bufsize1);
    1995                 : 
    1996               0 :     ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
    1997               0 :     ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
    1998                 : 
    1999               0 :     result = ucol_strcoll(locale->info.icu.ucol,
    2000                 :                           uchar1, ulen1,
    2001                 :                           uchar2, ulen2);
    2002                 : 
    2003               0 :     if (buf != sbuf)
    2004               0 :         pfree(buf);
    2005                 : 
    2006               0 :     return result;
    2007                 : }
    2008                 : 
    2009                 : /*
    2010                 :  * pg_strncoll_icu
    2011                 :  *
    2012                 :  * Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
    2013                 :  * database encoding. An argument length of -1 means the string is
    2014                 :  * NUL-terminated.
    2015                 :  *
    2016                 :  * Arguments must be encoded in the database encoding.
    2017                 :  */
    2018                 : static int
    2019 GNC    12027680 : pg_strncoll_icu(const char *arg1, int32_t len1, const char *arg2, int32_t len2,
    2020                 :                 pg_locale_t locale)
    2021                 : {
    2022                 :     int result;
    2023                 : 
    2024        12027680 :     Assert(locale->provider == COLLPROVIDER_ICU);
    2025                 : 
    2026                 : #ifdef HAVE_UCOL_STRCOLLUTF8
    2027        12027680 :     if (GetDatabaseEncoding() == PG_UTF8)
    2028                 :     {
    2029                 :         UErrorCode  status;
    2030                 : 
    2031        12027680 :         status = U_ZERO_ERROR;
    2032        12027680 :         result = ucol_strcollUTF8(locale->info.icu.ucol,
    2033                 :                                   arg1, len1,
    2034                 :                                   arg2, len2,
    2035                 :                                   &status);
    2036        12027680 :         if (U_FAILURE(status))
    2037 UNC           0 :             ereport(ERROR,
    2038                 :                     (errmsg("collation failed: %s", u_errorName(status))));
    2039                 :     }
    2040                 :     else
    2041                 : #endif
    2042                 :     {
    2043               0 :         result = pg_strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
    2044                 :     }
    2045                 : 
    2046 GNC    12027680 :     return result;
    2047                 : }
    2048                 : 
    2049                 : #endif                          /* USE_ICU */
    2050                 : 
    2051                 : /*
    2052                 :  * pg_strcoll
    2053                 :  *
    2054                 :  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
    2055                 :  * or wcscoll_l() as appropriate for the given locale, platform, and database
    2056                 :  * encoding. If the locale is not specified, use the database collation.
    2057                 :  *
    2058                 :  * Arguments must be encoded in the database encoding and nul-terminated.
    2059                 :  *
    2060                 :  * The caller is responsible for breaking ties if the collation is
    2061                 :  * deterministic; this maintains consistency with pg_strxfrm(), which cannot
    2062                 :  * easily account for deterministic collations.
    2063                 :  */
    2064                 : int
    2065        10692953 : pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
    2066                 : {
    2067                 :     int         result;
    2068                 : 
    2069        10692953 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2070             723 :         result = pg_strcoll_libc(arg1, arg2, locale);
    2071                 : #ifdef USE_ICU
    2072        10692230 :     else if (locale->provider == COLLPROVIDER_ICU)
    2073        10692230 :         result = pg_strncoll_icu(arg1, -1, arg2, -1, locale);
    2074                 : #endif
    2075                 :     else
    2076                 :         /* shouldn't happen */
    2077 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2078                 : 
    2079 GNC    10692953 :     return result;
    2080                 : }
    2081                 : 
    2082                 : /*
    2083                 :  * pg_strncoll
    2084                 :  *
    2085                 :  * Call ucol_strcollUTF8(), ucol_strcoll(), strcoll(), strcoll_l(), wcscoll(),
    2086                 :  * or wcscoll_l() as appropriate for the given locale, platform, and database
    2087                 :  * encoding. If the locale is not specified, use the database collation.
    2088                 :  *
    2089                 :  * Arguments must be encoded in the database encoding.
    2090                 :  *
    2091                 :  * This function may need to nul-terminate the arguments for libc functions;
    2092                 :  * so if the caller already has nul-terminated strings, it should call
    2093                 :  * pg_strcoll() instead.
    2094                 :  *
    2095                 :  * The caller is responsible for breaking ties if the collation is
    2096                 :  * deterministic; this maintains consistency with pg_strnxfrm(), which cannot
    2097                 :  * easily account for deterministic collations.
    2098                 :  */
    2099                 : int
    2100         1335702 : pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
    2101                 :             pg_locale_t locale)
    2102                 : {
    2103                 :     int      result;
    2104                 : 
    2105         1335702 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2106             252 :         result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
    2107                 : #ifdef USE_ICU
    2108         1335450 :     else if (locale->provider == COLLPROVIDER_ICU)
    2109         1335450 :         result = pg_strncoll_icu(arg1, len1, arg2, len2, locale);
    2110                 : #endif
    2111                 :     else
    2112                 :         /* shouldn't happen */
    2113 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2114                 : 
    2115 GNC     1335702 :     return result;
    2116                 : }
    2117                 : 
    2118                 : 
    2119                 : static size_t
    2120 UNC           0 : pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
    2121                 :                 pg_locale_t locale)
    2122                 : {
    2123               0 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    2124                 : 
    2125                 : #ifdef TRUST_STRXFRM
    2126                 : #ifdef HAVE_LOCALE_T
    2127                 :     if (locale)
    2128                 :         return strxfrm_l(dest, src, destsize, locale->info.lt);
    2129                 :     else
    2130                 : #endif
    2131                 :         return strxfrm(dest, src, destsize);
    2132                 : #else
    2133                 :     /* shouldn't happen */
    2134               0 :     elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2135                 :     return 0; /* keep compiler quiet */
    2136                 : #endif
    2137                 : }
    2138                 : 
    2139                 : static size_t
    2140               0 : pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
    2141                 :                  pg_locale_t locale)
    2142                 : {
    2143                 :     char     sbuf[TEXTBUFLEN];
    2144               0 :     char    *buf     = sbuf;
    2145               0 :     size_t   bufsize = srclen + 1;
    2146                 :     size_t   result;
    2147                 : 
    2148               0 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    2149                 : 
    2150               0 :     if (bufsize > TEXTBUFLEN)
    2151               0 :         buf = palloc(bufsize);
    2152                 : 
    2153                 :     /* nul-terminate arguments */
    2154               0 :     memcpy(buf, src, srclen);
    2155               0 :     buf[srclen] = '\0';
    2156                 : 
    2157               0 :     result = pg_strxfrm_libc(dest, buf, destsize, locale);
    2158                 : 
    2159               0 :     if (buf != sbuf)
    2160               0 :         pfree(buf);
    2161                 : 
    2162                 :     /* if dest is defined, it should be nul-terminated */
    2163               0 :     Assert(result >= destsize || dest[result] == '\0');
    2164                 : 
    2165               0 :     return result;
    2166                 : }
    2167                 : 
    2168                 : #ifdef USE_ICU
    2169                 : 
    2170                 : /* 'srclen' of -1 means the strings are NUL-terminated */
    2171                 : static size_t
    2172 GNC         498 : pg_strnxfrm_icu(char *dest, const char *src, int32_t srclen, int32_t destsize,
    2173                 :                 pg_locale_t locale)
    2174                 : {
    2175                 :     char     sbuf[TEXTBUFLEN];
    2176             498 :     char    *buf    = sbuf;
    2177                 :     UChar   *uchar;
    2178                 :     int32_t  ulen;
    2179                 :     size_t   uchar_bsize;
    2180                 :     Size     result_bsize;
    2181                 : 
    2182             498 :     Assert(locale->provider == COLLPROVIDER_ICU);
    2183                 : 
    2184             498 :     init_icu_converter();
    2185                 : 
    2186             498 :     ulen = uchar_length(icu_converter, src, srclen);
    2187                 : 
    2188             498 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
    2189                 : 
    2190             498 :     if (uchar_bsize > TEXTBUFLEN)
    2191 UNC           0 :         buf = palloc(uchar_bsize);
    2192                 : 
    2193 GNC         498 :     uchar = (UChar *) buf;
    2194                 : 
    2195             498 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
    2196                 : 
    2197             498 :     result_bsize = ucol_getSortKey(locale->info.icu.ucol,
    2198                 :                                    uchar, ulen,
    2199                 :                                    (uint8_t *) dest, destsize);
    2200                 : 
    2201                 :     /*
    2202                 :      * ucol_getSortKey() counts the nul-terminator in the result length, but
    2203                 :      * this function should not.
    2204                 :      */
    2205             498 :     Assert(result_bsize > 0);
    2206             498 :     result_bsize--;
    2207                 : 
    2208             498 :     if (buf != sbuf)
    2209 UNC           0 :         pfree(buf);
    2210                 : 
    2211                 :     /* if dest is defined, it should be nul-terminated */
    2212 GNC         498 :     Assert(result_bsize >= destsize || dest[result_bsize] == '\0');
    2213                 : 
    2214             498 :     return result_bsize;
    2215                 : }
    2216                 : 
    2217                 : /* 'srclen' of -1 means the strings are NUL-terminated */
    2218                 : static size_t
    2219 UNC           0 : pg_strnxfrm_prefix_icu_no_utf8(char *dest, const char *src, int32_t srclen,
    2220                 :                                int32_t destsize, pg_locale_t locale)
    2221                 : {
    2222                 :     char             sbuf[TEXTBUFLEN];
    2223               0 :     char            *buf   = sbuf;
    2224                 :     UCharIterator    iter;
    2225                 :     uint32_t         state[2];
    2226                 :     UErrorCode       status;
    2227               0 :     int32_t          ulen  = -1;
    2228               0 :     UChar           *uchar = NULL;
    2229                 :     size_t           uchar_bsize;
    2230                 :     Size             result_bsize;
    2231                 : 
    2232               0 :     Assert(locale->provider == COLLPROVIDER_ICU);
    2233               0 :     Assert(GetDatabaseEncoding() != PG_UTF8);
    2234                 : 
    2235               0 :     init_icu_converter();
    2236                 : 
    2237               0 :     ulen = uchar_length(icu_converter, src, srclen);
    2238                 : 
    2239               0 :     uchar_bsize = (ulen + 1) * sizeof(UChar);
    2240                 : 
    2241               0 :     if (uchar_bsize > TEXTBUFLEN)
    2242               0 :         buf = palloc(uchar_bsize);
    2243                 : 
    2244               0 :     uchar = (UChar *) buf;
    2245                 : 
    2246               0 :     ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
    2247                 : 
    2248               0 :     uiter_setString(&iter, uchar, ulen);
    2249               0 :     state[0] = state[1] = 0;    /* won't need that again */
    2250               0 :     status = U_ZERO_ERROR;
    2251               0 :     result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
    2252                 :                                         &iter,
    2253                 :                                         state,
    2254                 :                                         (uint8_t *) dest,
    2255                 :                                         destsize,
    2256                 :                                         &status);
    2257               0 :     if (U_FAILURE(status))
    2258               0 :         ereport(ERROR,
    2259                 :                 (errmsg("sort key generation failed: %s",
    2260                 :                         u_errorName(status))));
    2261                 : 
    2262               0 :     return result_bsize;
    2263                 : }
    2264                 : 
    2265                 : /* 'srclen' of -1 means the strings are NUL-terminated */
    2266                 : static size_t
    2267 GNC      166790 : pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
    2268                 :                        int32_t destsize, pg_locale_t locale)
    2269                 : {
    2270                 :     size_t result;
    2271                 : 
    2272          166790 :     Assert(locale->provider == COLLPROVIDER_ICU);
    2273                 : 
    2274          166790 :     if (GetDatabaseEncoding() == PG_UTF8)
    2275                 :     {
    2276                 :         UCharIterator iter;
    2277                 :         uint32_t    state[2];
    2278                 :         UErrorCode  status;
    2279                 : 
    2280          166790 :         uiter_setUTF8(&iter, src, srclen);
    2281          166790 :         state[0] = state[1] = 0;    /* won't need that again */
    2282          166790 :         status = U_ZERO_ERROR;
    2283          166790 :         result = ucol_nextSortKeyPart(locale->info.icu.ucol,
    2284                 :                                       &iter,
    2285                 :                                       state,
    2286                 :                                       (uint8_t *) dest,
    2287                 :                                       destsize,
    2288                 :                                       &status);
    2289          166790 :         if (U_FAILURE(status))
    2290 UNC           0 :             ereport(ERROR,
    2291                 :                     (errmsg("sort key generation failed: %s",
    2292                 :                             u_errorName(status))));
    2293                 :     }
    2294                 :     else
    2295               0 :         result = pg_strnxfrm_prefix_icu_no_utf8(dest, src, srclen, destsize,
    2296                 :                                                 locale);
    2297                 : 
    2298 GNC      166790 :     return result;
    2299                 : }
    2300                 : 
    2301                 : #endif
    2302                 : 
    2303                 : /*
    2304                 :  * Return true if the collation provider supports pg_strxfrm() and
    2305                 :  * pg_strnxfrm(); otherwise false.
    2306                 :  *
    2307                 :  * Unfortunately, it seems that strxfrm() for non-C collations is broken on
    2308                 :  * many common platforms; testing of multiple versions of glibc reveals that,
    2309                 :  * for many locales, strcoll() and strxfrm() do not return consistent
    2310                 :  * results. While no other libc other than Cygwin has so far been shown to
    2311                 :  * have a problem, we take the conservative course of action for right now and
    2312                 :  * disable this categorically.  (Users who are certain this isn't a problem on
    2313                 :  * their system can define TRUST_STRXFRM.)
    2314                 :  *
    2315                 :  * No similar problem is known for the ICU provider.
    2316                 :  */
    2317                 : bool
    2318           26140 : pg_strxfrm_enabled(pg_locale_t locale)
    2319                 : {
    2320           26140 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2321                 : #ifdef TRUST_STRXFRM
    2322                 :         return true;
    2323                 : #else
    2324             105 :         return false;
    2325                 : #endif
    2326           26035 :     else if (locale->provider == COLLPROVIDER_ICU)
    2327           26035 :         return true;
    2328                 :     else
    2329                 :         /* shouldn't happen */
    2330 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2331                 : 
    2332                 :     return false; /* keep compiler quiet */
    2333                 : }
    2334                 : 
    2335                 : /*
    2336                 :  * pg_strxfrm
    2337                 :  *
    2338                 :  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
    2339                 :  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
    2340                 :  * untransformed strings.
    2341                 :  *
    2342                 :  * The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
    2343                 :  * may be NULL.
    2344                 :  *
    2345                 :  * Returns the number of bytes needed to store the transformed string,
    2346                 :  * excluding the terminating nul byte. If the value returned is 'destsize' or
    2347                 :  * greater, the resulting contents of 'dest' are undefined.
    2348                 :  */
    2349                 : size_t
    2350               0 : pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
    2351                 : {
    2352               0 :     size_t result = 0; /* keep compiler quiet */
    2353                 : 
    2354               0 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2355               0 :         result = pg_strxfrm_libc(dest, src, destsize, locale);
    2356                 : #ifdef USE_ICU
    2357               0 :     else if (locale->provider == COLLPROVIDER_ICU)
    2358               0 :         result = pg_strnxfrm_icu(dest, src, -1, destsize, locale);
    2359                 : #endif
    2360                 :     else
    2361                 :         /* shouldn't happen */
    2362               0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2363                 : 
    2364               0 :     return result;
    2365                 : }
    2366                 : 
    2367                 : /*
    2368                 :  * pg_strnxfrm
    2369                 :  *
    2370                 :  * Transforms 'src' to a nul-terminated string stored in 'dest' such that
    2371                 :  * ordinary strcmp() on transformed strings is equivalent to pg_strcoll() on
    2372                 :  * untransformed strings.
    2373                 :  *
    2374                 :  * 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
    2375                 :  * be NULL.
    2376                 :  *
    2377                 :  * Returns the number of bytes needed to store the transformed string,
    2378                 :  * excluding the terminating nul byte. If the value returned is 'destsize' or
    2379                 :  * greater, the resulting contents of 'dest' are undefined.
    2380                 :  *
    2381                 :  * This function may need to nul-terminate the argument for libc functions;
    2382                 :  * so if the caller already has a nul-terminated string, it should call
    2383                 :  * pg_strxfrm() instead.
    2384                 :  */
    2385                 : size_t
    2386 GNC         498 : pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
    2387                 :             pg_locale_t locale)
    2388                 : {
    2389             498 :     size_t result = 0; /* keep compiler quiet */
    2390                 : 
    2391             498 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2392 UNC           0 :         result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
    2393                 : #ifdef USE_ICU
    2394 GNC         498 :     else if (locale->provider == COLLPROVIDER_ICU)
    2395             498 :         result = pg_strnxfrm_icu(dest, src, srclen, destsize, locale);
    2396                 : #endif
    2397                 :     else
    2398                 :         /* shouldn't happen */
    2399 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2400                 : 
    2401 GNC         498 :     return result;
    2402                 : }
    2403                 : 
    2404                 : /*
    2405                 :  * Return true if the collation provider supports pg_strxfrm_prefix() and
    2406                 :  * pg_strnxfrm_prefix(); otherwise false.
    2407                 :  */
    2408                 : bool
    2409          166790 : pg_strxfrm_prefix_enabled(pg_locale_t locale)
    2410                 : {
    2411          166790 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2412 UNC           0 :         return false;
    2413 GNC      166790 :     else if (locale->provider == COLLPROVIDER_ICU)
    2414          166790 :         return true;
    2415                 :     else
    2416                 :         /* shouldn't happen */
    2417 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2418                 : 
    2419                 :     return false; /* keep compiler quiet */
    2420                 : }
    2421                 : 
    2422                 : /*
    2423                 :  * pg_strxfrm_prefix
    2424                 :  *
    2425                 :  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
    2426                 :  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
    2427                 :  * untransformed strings. The result is not nul-terminated.
    2428                 :  *
    2429                 :  * The provided 'src' must be nul-terminated.
    2430                 :  *
    2431                 :  * If destsize is not large enough to hold the resulting byte sequence, stores
    2432                 :  * only the first destsize bytes in 'dest'. Returns the number of bytes
    2433                 :  * actually copied to 'dest'.
    2434                 :  */
    2435                 : size_t
    2436 GNC      166790 : pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
    2437                 :                   pg_locale_t locale)
    2438                 : {
    2439          166790 :     size_t result = 0; /* keep compiler quiet */
    2440                 : 
    2441          166790 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2442 UNC           0 :         elog(ERROR, "collprovider '%c' does not support pg_strxfrm_prefix()",
    2443                 :              locale->provider);
    2444                 : #ifdef USE_ICU
    2445 GNC      166790 :     else if (locale->provider == COLLPROVIDER_ICU)
    2446          166790 :         result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
    2447                 : #endif
    2448                 :     else
    2449                 :         /* shouldn't happen */
    2450 UNC           0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2451                 : 
    2452 GNC      166790 :     return result;
    2453                 : }
    2454                 : 
    2455                 : /*
    2456                 :  * pg_strnxfrm_prefix
    2457                 :  *
    2458                 :  * Transforms 'src' to a byte sequence stored in 'dest' such that ordinary
    2459                 :  * memcmp() on the byte sequence is equivalent to pg_strcoll() on
    2460                 :  * untransformed strings. The result is not nul-terminated.
    2461                 :  *
    2462                 :  * The provided 'src' must be nul-terminated.
    2463                 :  *
    2464                 :  * If destsize is not large enough to hold the resulting byte sequence, stores
    2465                 :  * only the first destsize bytes in 'dest'. Returns the number of bytes
    2466                 :  * actually copied to 'dest'.
    2467                 :  *
    2468                 :  * This function may need to nul-terminate the argument for libc functions;
    2469                 :  * so if the caller already has a nul-terminated string, it should call
    2470                 :  * pg_strxfrm_prefix() instead.
    2471                 :  */
    2472                 : size_t
    2473 UNC           0 : pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
    2474                 :                    size_t srclen, pg_locale_t locale)
    2475                 : {
    2476               0 :     size_t result = 0; /* keep compiler quiet */
    2477                 : 
    2478               0 :     if (!locale || locale->provider == COLLPROVIDER_LIBC)
    2479               0 :         elog(ERROR, "collprovider '%c' does not support pg_strnxfrm_prefix()",
    2480                 :              locale->provider);
    2481                 : #ifdef USE_ICU
    2482               0 :     else if (locale->provider == COLLPROVIDER_ICU)
    2483               0 :         result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
    2484                 : #endif
    2485                 :     else
    2486                 :         /* shouldn't happen */
    2487               0 :         elog(ERROR, "unsupported collprovider: %c", locale->provider);
    2488                 : 
    2489               0 :     return result;
    2490                 : }
    2491                 : 
    2492                 : #ifdef USE_ICU
    2493                 : 
    2494                 : /*
    2495                 :  * Wrapper around ucol_open() to handle API differences for older ICU
    2496                 :  * versions.
    2497                 :  */
    2498                 : static UCollator *
    2499 GNC      266329 : pg_ucol_open(const char *loc_str)
    2500                 : {
    2501                 :     UCollator  *collator;
    2502                 :     UErrorCode  status;
    2503          266329 :     const char *orig_str = loc_str;
    2504          266329 :     char       *fixed_str = NULL;
    2505                 : 
    2506                 :     /*
    2507                 :      * Must never open default collator, because it depends on the environment
    2508                 :      * and may change at any time. Should not happen, but check here to catch
    2509                 :      * bugs that might be hard to catch otherwise.
    2510                 :      *
    2511                 :      * NB: the default collator is not the same as the collator for the root
    2512                 :      * locale. The root locale may be specified as the empty string, "und", or
    2513                 :      * "root". The default collator is opened by passing NULL to ucol_open().
    2514                 :      */
    2515          266329 :     if (loc_str == NULL)
    2516 UNC           0 :         elog(ERROR, "opening default collator is not supported");
    2517                 : 
    2518                 :     /*
    2519                 :      * In ICU versions 54 and earlier, "und" is not a recognized spelling of
    2520                 :      * the root locale. If the first component of the locale is "und", replace
    2521                 :      * with "root" before opening.
    2522                 :      */
    2523                 :     if (U_ICU_VERSION_MAJOR_NUM < 55)
    2524                 :     {
    2525                 :         char        lang[ULOC_LANG_CAPACITY];
    2526                 : 
    2527                 :         status = U_ZERO_ERROR;
    2528                 :         uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
    2529                 :         if (U_FAILURE(status))
    2530                 :         {
    2531                 :             ereport(ERROR,
    2532                 :                     (errmsg("could not get language from locale \"%s\": %s",
    2533                 :                             loc_str, u_errorName(status))));
    2534                 :         }
    2535                 : 
    2536                 :         if (strcmp(lang, "und") == 0)
    2537                 :         {
    2538                 :             const char *remainder = loc_str + strlen("und");
    2539                 : 
    2540                 :             fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
    2541                 :             strcpy(fixed_str, "root");
    2542                 :             strcat(fixed_str, remainder);
    2543                 : 
    2544                 :             loc_str = fixed_str;
    2545                 :         }
    2546                 :     }
    2547                 : 
    2548 GNC      266329 :     status = U_ZERO_ERROR;
    2549          266329 :     collator = ucol_open(loc_str, &status);
    2550          266329 :     if (U_FAILURE(status))
    2551               6 :         ereport(ERROR,
    2552                 :                 /* use original string for error report */
    2553                 :                 (errmsg("could not open collator for locale \"%s\": %s",
    2554                 :                         orig_str, u_errorName(status))));
    2555                 : 
    2556                 :     if (U_ICU_VERSION_MAJOR_NUM < 54)
    2557                 :     {
    2558                 :         status = U_ZERO_ERROR;
    2559                 :         icu_set_collation_attributes(collator, loc_str, &status);
    2560                 : 
    2561                 :         /*
    2562                 :          * Pretend the error came from ucol_open(), for consistent error
    2563                 :          * message across ICU versions.
    2564                 :          */
    2565                 :         if (U_FAILURE(status))
    2566                 :         {
    2567                 :             ucol_close(collator);
    2568                 :             ereport(ERROR,
    2569                 :                     (errmsg("could not open collator for locale \"%s\": %s",
    2570                 :                             orig_str, u_errorName(status))));
    2571                 :         }
    2572                 :     }
    2573                 : 
    2574          266323 :     if (fixed_str != NULL)
    2575 UNC           0 :         pfree(fixed_str);
    2576                 : 
    2577 GNC      266323 :     return collator;
    2578                 : }
    2579                 : 
    2580                 : static void
    2581 GIC     1178278 : init_icu_converter(void)
    2582                 : {
    2583 ECB             :     const char *icu_encoding_name;
    2584                 :     UErrorCode  status;
    2585                 :     UConverter *conv;
    2586                 : 
    2587 GIC     1178278 :     if (icu_converter)
    2588         1178223 :         return;                 /* already done */
    2589                 : 
    2590              55 :     icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
    2591              55 :     if (!icu_encoding_name)
    2592 UIC           0 :         ereport(ERROR,
    2593                 :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    2594                 :                  errmsg("encoding \"%s\" not supported by ICU",
    2595                 :                         pg_encoding_to_char(GetDatabaseEncoding()))));
    2596                 : 
    2597 GIC          55 :     status = U_ZERO_ERROR;
    2598              55 :     conv = ucnv_open(icu_encoding_name, &status);
    2599              55 :     if (U_FAILURE(status))
    2600 UIC           0 :         ereport(ERROR,
    2601                 :                 (errmsg("could not open ICU converter for encoding \"%s\": %s",
    2602                 :                         icu_encoding_name, u_errorName(status))));
    2603                 : 
    2604 GIC          55 :     icu_converter = conv;
    2605                 : }
    2606                 : 
    2607                 : /*
    2608                 :  * Find length, in UChars, of given string if converted to UChar string.
    2609                 :  */
    2610                 : static size_t
    2611 GNC      589391 : uchar_length(UConverter *converter, const char *str, int32_t len)
    2612                 : {
    2613          589391 :     UErrorCode  status = U_ZERO_ERROR;
    2614                 :     int32_t     ulen;
    2615          589391 :     ulen = ucnv_toUChars(converter, NULL, 0, str, len, &status);
    2616          589391 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    2617 UNC           0 :         ereport(ERROR,
    2618                 :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    2619 GNC      589391 :     return ulen;
    2620                 : }
    2621                 : 
    2622                 : /*
    2623                 :  * Convert the given source string into a UChar string, stored in dest, and
    2624                 :  * return the length (in UChars).
    2625                 :  */
    2626                 : static int32_t
    2627          589391 : uchar_convert(UConverter *converter, UChar *dest, int32_t destlen,
    2628                 :               const char *src, int32_t srclen)
    2629                 : {
    2630          589391 :     UErrorCode  status = U_ZERO_ERROR;
    2631                 :     int32_t     ulen;
    2632          589391 :     status = U_ZERO_ERROR;
    2633          589391 :     ulen = ucnv_toUChars(converter, dest, destlen, src, srclen, &status);
    2634          589391 :     if (U_FAILURE(status))
    2635 UNC           0 :         ereport(ERROR,
    2636                 :                 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
    2637 GNC      589391 :     return ulen;
    2638                 : }
    2639                 : 
    2640                 : /*
    2641                 :  * Convert a string in the database encoding into a string of UChars.
    2642                 :  *
    2643                 :  * The source string at buff is of length nbytes
    2644                 :  * (it needn't be nul-terminated)
    2645                 :  *
    2646                 :  * *buff_uchar receives a pointer to the palloc'd result string, and
    2647                 :  * the function's result is the number of UChars generated.
    2648                 :  *
    2649                 :  * The result string is nul-terminated, though most callers rely on the
    2650                 :  * result length instead.
    2651                 :  */
    2652                 : int32_t
    2653 GIC      588893 : icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
    2654                 : {
    2655                 :     int32_t len_uchar;
    2656                 : 
    2657          588893 :     init_icu_converter();
    2658                 : 
    2659 GNC      588893 :     len_uchar = uchar_length(icu_converter, buff, nbytes);
    2660                 : 
    2661 GIC      588893 :     *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
    2662 GNC      588893 :     len_uchar = uchar_convert(icu_converter,
    2663                 :                               *buff_uchar, len_uchar + 1, buff, nbytes);
    2664                 : 
    2665 GIC      588893 :     return len_uchar;
    2666                 : }
    2667                 : 
    2668                 : /*
    2669                 :  * Convert a string of UChars into the database encoding.
    2670                 :  *
    2671                 :  * The source string at buff_uchar is of length len_uchar
    2672                 :  * (it needn't be nul-terminated)
    2673                 :  *
    2674                 :  * *result receives a pointer to the palloc'd result string, and the
    2675                 :  * function's result is the number of bytes generated (not counting nul).
    2676                 :  *
    2677                 :  * The result string is nul-terminated.
    2678                 :  */
    2679                 : int32_t
    2680          588887 : icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
    2681                 : {
    2682                 :     UErrorCode  status;
    2683                 :     int32_t     len_result;
    2684                 : 
    2685          588887 :     init_icu_converter();
    2686                 : 
    2687          588887 :     status = U_ZERO_ERROR;
    2688          588887 :     len_result = ucnv_fromUChars(icu_converter, NULL, 0,
    2689                 :                                  buff_uchar, len_uchar, &status);
    2690          588887 :     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
    2691 UIC           0 :         ereport(ERROR,
    2692                 :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
    2693                 :                         u_errorName(status))));
    2694                 : 
    2695 GIC      588887 :     *result = palloc(len_result + 1);
    2696 ECB             : 
    2697 GIC      588887 :     status = U_ZERO_ERROR;
    2698          588887 :     len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
    2699                 :                                  buff_uchar, len_uchar, &status);
    2700 CBC      588887 :     if (U_FAILURE(status))
    2701 UIC           0 :         ereport(ERROR,
    2702                 :                 (errmsg("%s failed: %s", "ucnv_fromUChars",
    2703                 :                         u_errorName(status))));
    2704                 : 
    2705 GIC      588887 :     return len_result;
    2706                 : }
    2707                 : 
    2708                 : /*
    2709                 :  * Parse collation attributes from the given locale string and apply them to
    2710                 :  * the open collator.
    2711                 :  *
    2712                 :  * First, the locale string is canonicalized to an ICU format locale ID such
    2713                 :  * as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
    2714                 :  * the key-value arguments.
    2715                 :  *
    2716 ECB             :  * Starting with ICU version 54, the attributes are processed automatically by
    2717                 :  * ucol_open(), so this is only necessary for emulating this behavior on older
    2718                 :  * versions.
    2719                 :  */
    2720                 : pg_attribute_unused()
    2721                 : static void
    2722 UNC           0 : icu_set_collation_attributes(UCollator *collator, const char *loc,
    2723                 :                              UErrorCode *status)
    2724 EUB             : {
    2725                 :     int32_t     len;
    2726                 :     char       *icu_locale_id;
    2727                 :     char       *lower_str;
    2728                 :     char       *str;
    2729 ECB             : 
    2730                 :     /*
    2731                 :      * The input locale may be a BCP 47 language tag, e.g.
    2732                 :      * "und-u-kc-ks-level1", which expresses the same attributes in a
    2733                 :      * different form. It will be converted to the equivalent ICU format
    2734                 :      * locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
    2735                 :      * uloc_canonicalize().
    2736                 :      */
    2737 UNC           0 :     *status = U_ZERO_ERROR;
    2738               0 :     len = uloc_canonicalize(loc, NULL, 0, status);
    2739               0 :     icu_locale_id = palloc(len + 1);
    2740               0 :     *status = U_ZERO_ERROR;
    2741               0 :     len = uloc_canonicalize(loc, icu_locale_id, len + 1, status);
    2742               0 :     if (U_FAILURE(*status))
    2743               0 :         return;
    2744                 : 
    2745               0 :     lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
    2746                 : 
    2747               0 :     pfree(icu_locale_id);
    2748                 : 
    2749               0 :     str = strchr(lower_str, '@');
    2750 UIC           0 :     if (!str)
    2751               0 :         return;
    2752               0 :     str++;
    2753                 : 
    2754               0 :     for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
    2755                 :     {
    2756               0 :         char       *e = strchr(token, '=');
    2757 ECB             : 
    2758 UIC           0 :         if (e)
    2759                 :         {
    2760                 :             char       *name;
    2761 ECB             :             char       *value;
    2762                 :             UColAttribute uattr;
    2763                 :             UColAttributeValue uvalue;
    2764                 : 
    2765 UNC           0 :             *status = U_ZERO_ERROR;
    2766                 : 
    2767 LBC           0 :             *e = '\0';
    2768 UIC           0 :             name = token;
    2769               0 :             value = e + 1;
    2770                 : 
    2771                 :             /*
    2772                 :              * See attribute name and value lists in ICU i18n/coll.cpp
    2773                 :              */
    2774               0 :             if (strcmp(name, "colstrength") == 0)
    2775 LBC           0 :                 uattr = UCOL_STRENGTH;
    2776 UBC           0 :             else if (strcmp(name, "colbackwards") == 0)
    2777 UIC           0 :                 uattr = UCOL_FRENCH_COLLATION;
    2778 LBC           0 :             else if (strcmp(name, "colcaselevel") == 0)
    2779               0 :                 uattr = UCOL_CASE_LEVEL;
    2780 UIC           0 :             else if (strcmp(name, "colcasefirst") == 0)
    2781               0 :                 uattr = UCOL_CASE_FIRST;
    2782 LBC           0 :             else if (strcmp(name, "colalternate") == 0)
    2783               0 :                 uattr = UCOL_ALTERNATE_HANDLING;
    2784               0 :             else if (strcmp(name, "colnormalization") == 0)
    2785               0 :                 uattr = UCOL_NORMALIZATION_MODE;
    2786 UIC           0 :             else if (strcmp(name, "colnumeric") == 0)
    2787 LBC           0 :                 uattr = UCOL_NUMERIC_COLLATION;
    2788                 :             else
    2789 ECB             :                 /* ignore if unknown */
    2790 UBC           0 :                 continue;
    2791                 : 
    2792 LBC           0 :             if (strcmp(value, "primary") == 0)
    2793 UIC           0 :                 uvalue = UCOL_PRIMARY;
    2794               0 :             else if (strcmp(value, "secondary") == 0)
    2795               0 :                 uvalue = UCOL_SECONDARY;
    2796               0 :             else if (strcmp(value, "tertiary") == 0)
    2797               0 :                 uvalue = UCOL_TERTIARY;
    2798               0 :             else if (strcmp(value, "quaternary") == 0)
    2799               0 :                 uvalue = UCOL_QUATERNARY;
    2800               0 :             else if (strcmp(value, "identical") == 0)
    2801               0 :                 uvalue = UCOL_IDENTICAL;
    2802               0 :             else if (strcmp(value, "no") == 0)
    2803               0 :                 uvalue = UCOL_OFF;
    2804               0 :             else if (strcmp(value, "yes") == 0)
    2805               0 :                 uvalue = UCOL_ON;
    2806               0 :             else if (strcmp(value, "shifted") == 0)
    2807               0 :                 uvalue = UCOL_SHIFTED;
    2808 UBC           0 :             else if (strcmp(value, "non-ignorable") == 0)
    2809 UIC           0 :                 uvalue = UCOL_NON_IGNORABLE;
    2810               0 :             else if (strcmp(value, "lower") == 0)
    2811               0 :                 uvalue = UCOL_LOWER_FIRST;
    2812 UBC           0 :             else if (strcmp(value, "upper") == 0)
    2813 UIC           0 :                 uvalue = UCOL_UPPER_FIRST;
    2814                 :             else
    2815                 :             {
    2816 UNC           0 :                 *status = U_ILLEGAL_ARGUMENT_ERROR;
    2817               0 :                 break;
    2818                 :             }
    2819                 : 
    2820               0 :             ucol_setAttribute(collator, uattr, uvalue, status);
    2821 EUB             :         }
    2822                 :     }
    2823                 : 
    2824 UNC           0 :     pfree(lower_str);
    2825                 : }
    2826 EUB             : 
    2827                 : #endif
    2828                 : 
    2829                 : /*
    2830                 :  * Return the BCP47 language tag representation of the requested locale.
    2831                 :  *
    2832                 :  * This function should be called before passing the string to ucol_open(),
    2833                 :  * because conversion to a language tag also performs "level 2
    2834                 :  * canonicalization". In addition to producing a consistent format, level 2
    2835                 :  * canonicalization is able to more accurately interpret different input
    2836                 :  * locale string formats, such as POSIX and .NET IDs.
    2837                 :  */
    2838                 : char *
    2839 GNC      244303 : icu_language_tag(const char *loc_str, int elevel)
    2840                 : {
    2841 EUB             : #ifdef USE_ICU
    2842                 :     UErrorCode   status;
    2843                 :     char         lang[ULOC_LANG_CAPACITY];
    2844                 :     char        *langtag;
    2845 GNC      244303 :     size_t       buflen = 32;   /* arbitrary starting buffer size */
    2846          244303 :     const bool   strict = true;
    2847 EUB             : 
    2848 GIC      244303 :     status = U_ZERO_ERROR;
    2849 GNC      244303 :     uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
    2850 GIC      244303 :     if (U_FAILURE(status))
    2851                 :     {
    2852 UNC           0 :         if (elevel > 0)
    2853               0 :             ereport(elevel,
    2854                 :                     (errmsg("could not get language from locale \"%s\": %s",
    2855                 :                             loc_str, u_errorName(status))));
    2856               0 :         return NULL;
    2857                 :     }
    2858 EUB             : 
    2859                 :     /* C/POSIX locales aren't handled by uloc_getLanguageTag() */
    2860 GNC      244303 :     if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
    2861               2 :         return pstrdup("en-US-u-va-posix");
    2862                 : 
    2863                 :     /*
    2864                 :      * A BCP47 language tag doesn't have a clearly-defined upper limit
    2865                 :      * (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
    2866                 :      * uloc_toLanguageTag() doesn't always return the ultimate length on the
    2867                 :      * first call, necessitating a loop.
    2868                 :      */
    2869          244301 :     langtag = palloc(buflen);
    2870                 :     while (true)
    2871 UNC           0 :     {
    2872                 :         int32_t     len;
    2873                 : 
    2874 GNC      244301 :         status = U_ZERO_ERROR;
    2875          244301 :         len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
    2876                 : 
    2877                 :         /*
    2878                 :          * If the result fits in the buffer exactly (len == buflen),
    2879                 :          * uloc_toLanguageTag() will return success without nul-terminating
    2880                 :          * the result. Check for either U_BUFFER_OVERFLOW_ERROR or len >=
    2881                 :          * buflen and try again.
    2882                 :          */
    2883          244301 :         if ((status == U_BUFFER_OVERFLOW_ERROR ||
    2884          244301 :              (U_SUCCESS(status) && len >= buflen)) &&
    2885                 :             buflen < MaxAllocSize)
    2886                 :         {
    2887 UNC           0 :             buflen = Min(buflen * 2, MaxAllocSize);
    2888               0 :             langtag = repalloc(langtag, buflen);
    2889               0 :             continue;
    2890                 :         }
    2891                 : 
    2892 GNC      244301 :         break;
    2893                 :     }
    2894                 : 
    2895          244301 :     if (U_FAILURE(status))
    2896                 :     {
    2897               6 :         pfree(langtag);
    2898                 : 
    2899               6 :         if (elevel > 0)
    2900               6 :             ereport(elevel,
    2901                 :                     (errmsg("could not convert locale name \"%s\" to language tag: %s",
    2902                 :                             loc_str, u_errorName(status))));
    2903               3 :         return NULL;
    2904                 :     }
    2905                 : 
    2906          244295 :     return langtag;
    2907                 : #else                           /* not USE_ICU */
    2908                 :     ereport(ERROR,
    2909                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    2910                 :              errmsg("ICU is not supported in this build")));
    2911                 :     return NULL;        /* keep compiler quiet */
    2912                 : #endif                          /* not USE_ICU */
    2913                 : }
    2914                 : 
    2915                 : /*
    2916                 :  * Perform best-effort check that the locale is a valid one.
    2917                 :  */
    2918                 : void
    2919             855 : icu_validate_locale(const char *loc_str)
    2920                 : {
    2921                 : #ifdef USE_ICU
    2922                 :     UCollator   *collator;
    2923                 :     UErrorCode   status;
    2924                 :     char         lang[ULOC_LANG_CAPACITY];
    2925             855 :     bool         found   = false;
    2926             855 :     int          elevel = icu_validation_level;
    2927                 : 
    2928                 :     /* no validation */
    2929             855 :     if (elevel < 0)
    2930 UNC           0 :         return;
    2931                 : 
    2932                 :     /* downgrade to WARNING during pg_upgrade */
    2933 GNC         855 :     if (IsBinaryUpgrade && elevel > WARNING)
    2934               7 :         elevel = WARNING;
    2935                 : 
    2936                 :     /* validate that we can extract the language */
    2937             855 :     status = U_ZERO_ERROR;
    2938             855 :     uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
    2939             855 :     if (U_FAILURE(status))
    2940                 :     {
    2941 UNC           0 :         ereport(elevel,
    2942                 :                 (errmsg("could not get language from ICU locale \"%s\": %s",
    2943                 :                         loc_str, u_errorName(status)),
    2944                 :                  errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
    2945               0 :         return;
    2946                 :     }
    2947                 : 
    2948                 :     /* check for special language name */
    2949 GNC         855 :     if (strcmp(lang, "") == 0 ||
    2950             804 :         strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
    2951             804 :         strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
    2952              51 :         found = true;
    2953                 : 
    2954                 :     /* search for matching language within ICU */
    2955          118707 :     for (int32_t i = 0; !found && i < uloc_countAvailable(); i++)
    2956                 :     {
    2957          117852 :         const char  *otherloc = uloc_getAvailable(i);
    2958                 :         char         otherlang[ULOC_LANG_CAPACITY];
    2959                 : 
    2960          117852 :         status = U_ZERO_ERROR;
    2961          117852 :         uloc_getLanguage(otherloc, otherlang, ULOC_LANG_CAPACITY, &status);
    2962          117852 :         if (U_FAILURE(status))
    2963 UNC           0 :             continue;
    2964                 : 
    2965 GNC      117852 :         if (strcmp(lang, otherlang) == 0)
    2966             798 :             found = true;
    2967                 :     }
    2968                 : 
    2969             855 :     if (!found)
    2970               6 :         ereport(elevel,
    2971                 :                 (errmsg("ICU locale \"%s\" has unknown language \"%s\"",
    2972                 :                         loc_str, lang),
    2973                 :                  errhint("To disable ICU locale validation, set parameter icu_validation_level to DISABLED.")));
    2974                 : 
    2975                 :     /* check that it can be opened */
    2976             852 :     collator = pg_ucol_open(loc_str);
    2977             848 :     ucol_close(collator);
    2978                 : #else                           /* not USE_ICU */
    2979                 :     /* could get here if a collation was created by a build with ICU */
    2980                 :     ereport(ERROR,
    2981                 :             (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
    2982                 :              errmsg("ICU is not supported in this build")));
    2983                 : #endif                          /* not USE_ICU */
    2984                 : }
    2985                 : 
    2986                 : /*
    2987                 :  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
    2988 ECB             :  * Therefore we keep them here rather than with the mbutils code.
    2989                 :  */
    2990                 : 
    2991                 : /*
    2992                 :  * wchar2char --- convert wide characters to multibyte format
    2993                 :  *
    2994                 :  * This has the same API as the standard wcstombs_l() function; in particular,
    2995                 :  * tolen is the maximum number of bytes to store at *to, and *from must be
    2996                 :  * zero-terminated.  The output will be zero-terminated iff there is room.
    2997                 :  */
    2998                 : size_t
    2999 GIC      136185 : wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
    3000 ECB             : {
    3001                 :     size_t      result;
    3002                 : 
    3003 GIC      136185 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    3004                 : 
    3005 CBC      136185 :     if (tolen == 0)
    3006 UBC           0 :         return 0;
    3007                 : 
    3008                 : #ifdef WIN32
    3009                 : 
    3010                 :     /*
    3011                 :      * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
    3012 EUB             :      * for some reason mbstowcs and wcstombs won't do this for us, so we use
    3013                 :      * MultiByteToWideChar().
    3014                 :      */
    3015 ECB             :     if (GetDatabaseEncoding() == PG_UTF8)
    3016                 :     {
    3017                 :         result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
    3018                 :                                      NULL, NULL);
    3019                 :         /* A zero return is failure */
    3020                 :         if (result <= 0)
    3021                 :             result = -1;
    3022                 :         else
    3023                 :         {
    3024                 :             Assert(result <= tolen);
    3025                 :             /* Microsoft counts the zero terminator in the result */
    3026                 :             result--;
    3027                 :         }
    3028                 :     }
    3029                 :     else
    3030                 : #endif                          /* WIN32 */
    3031 GIC      136185 :     if (locale == (pg_locale_t) 0)
    3032                 :     {
    3033                 :         /* Use wcstombs directly for the default locale */
    3034 CBC      135993 :         result = wcstombs(to, from, tolen);
    3035                 :     }
    3036                 :     else
    3037                 :     {
    3038 ECB             : #ifdef HAVE_LOCALE_T
    3039                 : #ifdef HAVE_WCSTOMBS_L
    3040                 :         /* Use wcstombs_l for nondefault locales */
    3041                 :         result = wcstombs_l(to, from, tolen, locale->info.lt);
    3042                 : #else                           /* !HAVE_WCSTOMBS_L */
    3043                 :         /* We have to temporarily set the locale as current ... ugh */
    3044 GIC         192 :         locale_t    save_locale = uselocale(locale->info.lt);
    3045                 : 
    3046 GBC         192 :         result = wcstombs(to, from, tolen);
    3047                 : 
    3048 CBC         192 :         uselocale(save_locale);
    3049                 : #endif                          /* HAVE_WCSTOMBS_L */
    3050                 : #else                           /* !HAVE_LOCALE_T */
    3051                 :         /* Can't have locale != 0 without HAVE_LOCALE_T */
    3052                 :         elog(ERROR, "wcstombs_l is not available");
    3053                 :         result = 0;             /* keep compiler quiet */
    3054                 : #endif                          /* HAVE_LOCALE_T */
    3055                 :     }
    3056                 : 
    3057 GIC      136185 :     return result;
    3058                 : }
    3059                 : 
    3060                 : /*
    3061                 :  * char2wchar --- convert multibyte characters to wide characters
    3062                 :  *
    3063                 :  * This has almost the API of mbstowcs_l(), except that *from need not be
    3064                 :  * null-terminated; instead, the number of input bytes is specified as
    3065                 :  * fromlen.  Also, we ereport() rather than returning -1 for invalid
    3066                 :  * input encoding.  tolen is the maximum number of wchar_t's to store at *to.
    3067                 :  * The output will be zero-terminated iff there is room.
    3068                 :  */
    3069 ECB             : size_t
    3070 GIC      137763 : char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
    3071                 :            pg_locale_t locale)
    3072                 : {
    3073                 :     size_t      result;
    3074 ECB             : 
    3075 CBC      137763 :     Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
    3076                 : 
    3077          137763 :     if (tolen == 0)
    3078 LBC           0 :         return 0;
    3079                 : 
    3080                 : #ifdef WIN32
    3081                 :     /* See WIN32 "Unicode" comment above */
    3082 EUB             :     if (GetDatabaseEncoding() == PG_UTF8)
    3083                 :     {
    3084 ECB             :         /* Win32 API does not work for zero-length input */
    3085                 :         if (fromlen == 0)
    3086                 :             result = 0;
    3087                 :         else
    3088                 :         {
    3089 EUB             :             result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
    3090                 :             /* A zero return is failure */
    3091                 :             if (result == 0)
    3092                 :                 result = -1;
    3093                 :         }
    3094                 : 
    3095                 :         if (result != -1)
    3096                 :         {
    3097                 :             Assert(result < tolen);
    3098                 :             /* Append trailing null wchar (MultiByteToWideChar() does not) */
    3099                 :             to[result] = 0;
    3100                 :         }
    3101                 :     }
    3102                 :     else
    3103                 : #endif                          /* WIN32 */
    3104                 :     {
    3105                 :         /* mbstowcs requires ending '\0' */
    3106 GIC      137763 :         char       *str = pnstrdup(from, fromlen);
    3107                 : 
    3108          137763 :         if (locale == (pg_locale_t) 0)
    3109 EUB             :         {
    3110                 :             /* Use mbstowcs directly for the default locale */
    3111 GIC      137571 :             result = mbstowcs(to, str, tolen);
    3112                 :         }
    3113 EUB             :         else
    3114                 :         {
    3115                 : #ifdef HAVE_LOCALE_T
    3116                 : #ifdef HAVE_MBSTOWCS_L
    3117                 :             /* Use mbstowcs_l for nondefault locales */
    3118                 :             result = mbstowcs_l(to, str, tolen, locale->info.lt);
    3119                 : #else                           /* !HAVE_MBSTOWCS_L */
    3120                 :             /* We have to temporarily set the locale as current ... ugh */
    3121 GIC         192 :             locale_t    save_locale = uselocale(locale->info.lt);
    3122                 : 
    3123 GBC         192 :             result = mbstowcs(to, str, tolen);
    3124 EUB             : 
    3125 GIC         192 :             uselocale(save_locale);
    3126 EUB             : #endif                          /* HAVE_MBSTOWCS_L */
    3127                 : #else                           /* !HAVE_LOCALE_T */
    3128                 :             /* Can't have locale != 0 without HAVE_LOCALE_T */
    3129                 :             elog(ERROR, "mbstowcs_l is not available");
    3130                 :             result = 0;         /* keep compiler quiet */
    3131                 : #endif                          /* HAVE_LOCALE_T */
    3132                 :         }
    3133                 : 
    3134 GBC      137763 :         pfree(str);
    3135                 :     }
    3136                 : 
    3137 GIC      137763 :     if (result == -1)
    3138                 :     {
    3139                 :         /*
    3140                 :          * Invalid multibyte character encountered.  We try to give a useful
    3141 ECB             :          * error message by letting pg_verifymbstr check the string.  But it's
    3142                 :          * possible that the string is OK to us, and not OK to mbstowcs ---
    3143                 :          * this suggests that the LC_CTYPE locale is different from the
    3144                 :          * database encoding.  Give a generic error message if pg_verifymbstr
    3145                 :          * can't find anything wrong.
    3146                 :          */
    3147 UIC           0 :         pg_verifymbstr(from, fromlen, false);   /* might not return */
    3148                 :         /* but if it does ... */
    3149               0 :         ereport(ERROR,
    3150                 :                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
    3151 ECB             :                  errmsg("invalid multibyte character for locale"),
    3152                 :                  errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
    3153                 :     }
    3154                 : 
    3155 CBC      137763 :     return result;
    3156                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a