LCOV - differential code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Coverage Total Hit UNC UIC UBC GNC CBC DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 54.9 % 315 173 2 1 139 1 172 3 1
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 15 15 2 13
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (240..) days: 54.8 % 314 172 2 1 139 172
Function coverage date bins:
(240..) days: 100.0 % 15 15 2 13

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * regc_pg_locale.c
                                  4                 :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
                                  5                 :  *    and functions to cache the results of wholesale ctype probing.
                                  6                 :  *
                                  7                 :  * This file is #included by regcomp.c; it's not meant to compile standalone.
                                  8                 :  *
                                  9                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
                                 10                 :  * Portions Copyright (c) 1994, Regents of the University of California
                                 11                 :  *
                                 12                 :  * IDENTIFICATION
                                 13                 :  *    src/backend/regex/regc_pg_locale.c
                                 14                 :  *
                                 15                 :  *-------------------------------------------------------------------------
                                 16                 :  */
                                 17                 : 
                                 18                 : #include "catalog/pg_collation.h"
                                 19                 : #include "utils/pg_locale.h"
                                 20                 : 
                                 21                 : /*
                                 22                 :  * To provide as much functionality as possible on a variety of platforms,
                                 23                 :  * without going so far as to implement everything from scratch, we use
                                 24                 :  * several implementation strategies depending on the situation:
                                 25                 :  *
                                 26                 :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
                                 27                 :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
                                 28                 :  * collations don't give a fig about multibyte characters.
                                 29                 :  *
                                 30                 :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
                                 31                 :  *
                                 32                 :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
                                 33                 :  * This assumes that every platform uses Unicode codepoints directly
                                 34                 :  * as the wchar_t representation of Unicode.  On some platforms
                                 35                 :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
                                 36                 :  *
                                 37                 :  * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
                                 38                 :  * values up to 255, and punt for values above that.  This is 100% correct
                                 39                 :  * only in single-byte encodings such as LATINn.  However, non-Unicode
                                 40                 :  * multibyte encodings are mostly Far Eastern character sets for which the
                                 41                 :  * properties being tested here aren't very relevant for higher code values
                                 42                 :  * anyway.  The difficulty with using the <wctype.h> functions with
                                 43                 :  * non-Unicode multibyte encodings is that we can have no certainty that
                                 44                 :  * the platform's wchar_t representation matches what we do in pg_wchar
                                 45                 :  * conversions.
                                 46                 :  *
                                 47                 :  * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
                                 48                 :  * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
                                 49                 :  * functions, under exactly the same cases as #2.
                                 50                 :  *
                                 51                 :  * There is one notable difference between cases 2 and 3: in the "default"
                                 52                 :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
                                 53                 :  * while in a non-default collation we just let the library functions do what
                                 54                 :  * they will.  The case where this matters is treatment of I/i in Turkish,
                                 55                 :  * and the behavior is meant to match the upper()/lower() SQL functions.
                                 56                 :  *
                                 57                 :  * We store the active collation setting in static variables.  In principle
                                 58                 :  * it could be passed down to here via the regex library's "struct vars" data
                                 59                 :  * structure; but that would require somewhat invasive changes in the regex
                                 60                 :  * library, and right now there's no real benefit to be gained from that.
                                 61                 :  *
                                 62                 :  * NB: the coding here assumes pg_wchar is an unsigned type.
                                 63                 :  */
                                 64                 : 
                                 65                 : typedef enum
                                 66                 : {
                                 67                 :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
                                 68                 :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
                                 69                 :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
                                 70                 :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
                                 71                 :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
                                 72                 :     PG_REGEX_LOCALE_ICU         /* Use ICU uchar.h functions */
                                 73                 : } PG_Locale_Strategy;
                                 74                 : 
                                 75                 : static PG_Locale_Strategy pg_regex_strategy;
                                 76                 : static pg_locale_t pg_regex_locale;
                                 77                 : static Oid  pg_regex_collation;
                                 78                 : 
                                 79                 : /*
                                 80                 :  * Hard-wired character properties for C locale
                                 81                 :  */
                                 82                 : #define PG_ISDIGIT  0x01
                                 83                 : #define PG_ISALPHA  0x02
                                 84                 : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
                                 85                 : #define PG_ISUPPER  0x04
                                 86                 : #define PG_ISLOWER  0x08
                                 87                 : #define PG_ISGRAPH  0x10
                                 88                 : #define PG_ISPRINT  0x20
                                 89                 : #define PG_ISPUNCT  0x40
                                 90                 : #define PG_ISSPACE  0x80
                                 91                 : 
                                 92                 : static const unsigned char pg_char_properties[128] = {
                                 93                 :      /* NUL */ 0,
                                 94                 :      /* ^A */ 0,
                                 95                 :      /* ^B */ 0,
                                 96                 :      /* ^C */ 0,
                                 97                 :      /* ^D */ 0,
                                 98                 :      /* ^E */ 0,
                                 99                 :      /* ^F */ 0,
                                100                 :      /* ^G */ 0,
                                101                 :      /* ^H */ 0,
                                102                 :      /* ^I */ PG_ISSPACE,
                                103                 :      /* ^J */ PG_ISSPACE,
                                104                 :      /* ^K */ PG_ISSPACE,
                                105                 :      /* ^L */ PG_ISSPACE,
                                106                 :      /* ^M */ PG_ISSPACE,
                                107                 :      /* ^N */ 0,
                                108                 :      /* ^O */ 0,
                                109                 :      /* ^P */ 0,
                                110                 :      /* ^Q */ 0,
                                111                 :      /* ^R */ 0,
                                112                 :      /* ^S */ 0,
                                113                 :      /* ^T */ 0,
                                114                 :      /* ^U */ 0,
                                115                 :      /* ^V */ 0,
                                116                 :      /* ^W */ 0,
                                117                 :      /* ^X */ 0,
                                118                 :      /* ^Y */ 0,
                                119                 :      /* ^Z */ 0,
                                120                 :      /* ^[ */ 0,
                                121                 :      /* ^\ */ 0,
                                122                 :      /* ^] */ 0,
                                123                 :      /* ^^ */ 0,
                                124                 :      /* ^_ */ 0,
                                125                 :      /* */ PG_ISPRINT | PG_ISSPACE,
                                126                 :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                127                 :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                128                 :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                129                 :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                130                 :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                131                 :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                132                 :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                133                 :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                134                 :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                135                 :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                136                 :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                137                 :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                138                 :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                139                 :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                140                 :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                141                 :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                142                 :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                143                 :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                144                 :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                145                 :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                146                 :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                147                 :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                148                 :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                149                 :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                150                 :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                151                 :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                152                 :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                153                 :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                154                 :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                155                 :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                156                 :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                157                 :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                158                 :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                159                 :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                160                 :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                161                 :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                162                 :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                163                 :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                164                 :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                165                 :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                166                 :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                167                 :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                168                 :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                169                 :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                170                 :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                171                 :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                172                 :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                173                 :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                174                 :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                175                 :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                176                 :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                177                 :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                178                 :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                179                 :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                180                 :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                181                 :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                182                 :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                183                 :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                184                 :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                185                 :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                186                 :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                187                 :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                188                 :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                189                 :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                190                 :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                191                 :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                192                 :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                193                 :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                194                 :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                195                 :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                196                 :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                197                 :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                198                 :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                199                 :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                200                 :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                201                 :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                202                 :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                203                 :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                204                 :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                205                 :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                206                 :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                207                 :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                208                 :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                209                 :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                210                 :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                211                 :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                212                 :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                213                 :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                214                 :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                215                 :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                216                 :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                217                 :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                218                 :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                219                 :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                220                 :      /* DEL */ 0
                                221                 : };
                                222                 : 
                                223                 : 
                                224                 : /*
                                225                 :  * pg_set_regex_collation: set collation for these functions to obey
                                226                 :  *
                                227                 :  * This is called when beginning compilation or execution of a regexp.
                                228                 :  * Since there's no need for reentrancy of regexp operations, it's okay
                                229                 :  * to store the results in static variables.
                                230                 :  */
                                231                 : void
 4382 tgl                       232 CBC      912287 : pg_set_regex_collation(Oid collation)
                                233                 : {
  444 peter                     234          912287 :     if (!OidIsValid(collation))
                                235                 :     {
                                236                 :         /*
                                237                 :          * This typically means that the parser could not resolve a conflict
                                238                 :          * of implicit collations, so report it that way.
                                239                 :          */
  444 peter                     240 UBC           0 :         ereport(ERROR,
                                241                 :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
                                242                 :                  errmsg("could not determine which collation to use for regular expression"),
                                243                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
                                244                 :     }
                                245                 : 
 4382 tgl                       246 CBC      912287 :     if (lc_ctype_is_c(collation))
                                247                 :     {
                                248                 :         /* C/POSIX collations use this path regardless of database encoding */
                                249           62841 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
                                250           62841 :         pg_regex_locale = 0;
 4067                           251           62841 :         pg_regex_collation = C_COLLATION_OID;
                                252                 :     }
                                253                 :     else
                                254                 :     {
                                255                 :         /*
                                256                 :          * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; the
                                257                 :          * case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not have to
                                258                 :          * be considered below.
                                259                 :          */
  444 peter                     260          849446 :         pg_regex_locale = pg_newlocale_from_collation(collation);
                                261                 : 
   45 jdavis                    262 GNC      849446 :         if (!pg_locale_deterministic(pg_regex_locale))
 1479 peter                     263 CBC          12 :             ereport(ERROR,
                                264                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                265                 :                      errmsg("nondeterministic collations are not supported for regular expressions")));
                                266                 : 
                                267                 : #ifdef USE_ICU
 2208 peter_e                   268          849434 :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
                                269          848999 :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
                                270                 :         else
                                271                 : #endif
 4382 tgl                       272             435 :         if (GetDatabaseEncoding() == PG_UTF8)
                                273                 :         {
                                274             433 :             if (pg_regex_locale)
                                275             429 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
                                276                 :             else
                                277               4 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
                                278                 :         }
                                279                 :         else
                                280                 :         {
                                281               2 :             if (pg_regex_locale)
 4382 tgl                       282 UBC           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
                                283                 :             else
 4382 tgl                       284 CBC           2 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
                                285                 :         }
                                286                 : 
 4067                           287          849434 :         pg_regex_collation = collation;
                                288                 :     }
 4382                           289          912275 : }
                                290                 : 
                                291                 : static int
                                292           74885 : pg_wc_isdigit(pg_wchar c)
                                293                 : {
                                294           74885 :     switch (pg_regex_strategy)
                                295                 :     {
                                296             943 :         case PG_REGEX_LOCALE_C:
                                297            1886 :             return (c <= (pg_wchar) 127 &&
                                298             943 :                     (pg_char_properties[c] & PG_ISDIGIT));
 4382 tgl                       299 UBC           0 :         case PG_REGEX_LOCALE_WIDE:
                                300                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                301               0 :                 return iswdigit((wint_t) c);
                                302                 :             /* FALL THRU */
                                303                 :         case PG_REGEX_LOCALE_1BYTE:
                                304               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                305               0 :                     isdigit((unsigned char) c));
 4382 tgl                       306 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                307                 : #ifdef HAVE_LOCALE_T
                                308                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   309            6144 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
                                310                 : #endif
                                311                 :             /* FALL THRU */
                                312                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                313                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       314 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   315               0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
                                316                 : #endif
                                317                 :             break;
 2208 peter_e                   318 CBC       67798 :         case PG_REGEX_LOCALE_ICU:
                                319                 : #ifdef USE_ICU
                                320           67798 :             return u_isdigit(c);
                                321                 : #endif
                                322                 :             break;
                                323                 :     }
 4382 tgl                       324 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                325                 : }
                                326                 : 
                                327                 : static int
 4382 tgl                       328 CBC       14475 : pg_wc_isalpha(pg_wchar c)
                                329                 : {
                                330           14475 :     switch (pg_regex_strategy)
                                331                 :     {
 4382 tgl                       332 UBC           0 :         case PG_REGEX_LOCALE_C:
                                333               0 :             return (c <= (pg_wchar) 127 &&
                                334               0 :                     (pg_char_properties[c] & PG_ISALPHA));
                                335               0 :         case PG_REGEX_LOCALE_WIDE:
                                336                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                337               0 :                 return iswalpha((wint_t) c);
                                338                 :             /* FALL THRU */
                                339                 :         case PG_REGEX_LOCALE_1BYTE:
                                340               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                341               0 :                     isalpha((unsigned char) c));
 4382 tgl                       342 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                343                 : #ifdef HAVE_LOCALE_T
                                344                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   345            6144 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
                                346                 : #endif
                                347                 :             /* FALL THRU */
                                348                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                349                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       350 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   351               0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
                                352                 : #endif
                                353                 :             break;
 2208 peter_e                   354 CBC        8331 :         case PG_REGEX_LOCALE_ICU:
                                355                 : #ifdef USE_ICU
                                356            8331 :             return u_isalpha(c);
                                357                 : #endif
                                358                 :             break;
                                359                 :     }
 4382 tgl                       360 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                361                 : }
                                362                 : 
                                363                 : static int
 4382 tgl                       364 CBC       34506 : pg_wc_isalnum(pg_wchar c)
                                365                 : {
                                366           34506 :     switch (pg_regex_strategy)
                                367                 :     {
                                368             821 :         case PG_REGEX_LOCALE_C:
                                369            1642 :             return (c <= (pg_wchar) 127 &&
                                370             821 :                     (pg_char_properties[c] & PG_ISALNUM));
 4382 tgl                       371 UBC           0 :         case PG_REGEX_LOCALE_WIDE:
                                372                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                373               0 :                 return iswalnum((wint_t) c);
                                374                 :             /* FALL THRU */
                                375                 :         case PG_REGEX_LOCALE_1BYTE:
                                376               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                377               0 :                     isalnum((unsigned char) c));
 4382 tgl                       378 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                379                 : #ifdef HAVE_LOCALE_T
                                380                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   381            6144 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
                                382                 : #endif
                                383                 :             /* FALL THRU */
                                384                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                385                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       386 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   387               0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
                                388                 : #endif
                                389                 :             break;
 2208 peter_e                   390 CBC       27541 :         case PG_REGEX_LOCALE_ICU:
                                391                 : #ifdef USE_ICU
                                392           27541 :             return u_isalnum(c);
                                393                 : #endif
                                394                 :             break;
                                395                 :     }
 4382 tgl                       396 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                397                 : }
                                398                 : 
                                399                 : static int
  773 tgl                       400 CBC       16769 : pg_wc_isword(pg_wchar c)
                                401                 : {
                                402                 :     /* We define word characters as alnum class plus underscore */
                                403           16769 :     if (c == CHR('_'))
                                404              11 :         return 1;
                                405           16758 :     return pg_wc_isalnum(c);
                                406                 : }
                                407                 : 
                                408                 : static int
 4382                           409           14344 : pg_wc_isupper(pg_wchar c)
                                410                 : {
                                411           14344 :     switch (pg_regex_strategy)
                                412                 :     {
 4382 tgl                       413 UBC           0 :         case PG_REGEX_LOCALE_C:
                                414               0 :             return (c <= (pg_wchar) 127 &&
                                415               0 :                     (pg_char_properties[c] & PG_ISUPPER));
                                416               0 :         case PG_REGEX_LOCALE_WIDE:
                                417                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                418               0 :                 return iswupper((wint_t) c);
                                419                 :             /* FALL THRU */
                                420                 :         case PG_REGEX_LOCALE_1BYTE:
                                421               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                422               0 :                     isupper((unsigned char) c));
 4382 tgl                       423 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                424                 : #ifdef HAVE_LOCALE_T
                                425                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   426            6144 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
                                427                 : #endif
                                428                 :             /* FALL THRU */
                                429                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                430                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       431 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   432               0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
                                433                 : #endif
                                434                 :             break;
 2208 peter_e                   435 CBC        8200 :         case PG_REGEX_LOCALE_ICU:
                                436                 : #ifdef USE_ICU
                                437            8200 :             return u_isupper(c);
                                438                 : #endif
                                439                 :             break;
                                440                 :     }
 4382 tgl                       441 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                442                 : }
                                443                 : 
                                444                 : static int
 4382 tgl                       445 CBC       14339 : pg_wc_islower(pg_wchar c)
                                446                 : {
                                447           14339 :     switch (pg_regex_strategy)
                                448                 :     {
 4382 tgl                       449 UBC           0 :         case PG_REGEX_LOCALE_C:
                                450               0 :             return (c <= (pg_wchar) 127 &&
                                451               0 :                     (pg_char_properties[c] & PG_ISLOWER));
                                452               0 :         case PG_REGEX_LOCALE_WIDE:
                                453                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                454               0 :                 return iswlower((wint_t) c);
                                455                 :             /* FALL THRU */
                                456                 :         case PG_REGEX_LOCALE_1BYTE:
                                457               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                458               0 :                     islower((unsigned char) c));
 4382 tgl                       459 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                460                 : #ifdef HAVE_LOCALE_T
                                461                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   462            6144 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
                                463                 : #endif
                                464                 :             /* FALL THRU */
                                465                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                466                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       467 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   468               0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
                                469                 : #endif
                                470                 :             break;
 2208 peter_e                   471 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
                                472                 : #ifdef USE_ICU
                                473            8195 :             return u_islower(c);
                                474                 : #endif
                                475                 :             break;
                                476                 :     }
 4382 tgl                       477 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                478                 : }
                                479                 : 
                                480                 : static int
 4382 tgl                       481 CBC       14339 : pg_wc_isgraph(pg_wchar c)
                                482                 : {
                                483           14339 :     switch (pg_regex_strategy)
                                484                 :     {
 4382 tgl                       485 UBC           0 :         case PG_REGEX_LOCALE_C:
                                486               0 :             return (c <= (pg_wchar) 127 &&
                                487               0 :                     (pg_char_properties[c] & PG_ISGRAPH));
                                488               0 :         case PG_REGEX_LOCALE_WIDE:
                                489                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                490               0 :                 return iswgraph((wint_t) c);
                                491                 :             /* FALL THRU */
                                492                 :         case PG_REGEX_LOCALE_1BYTE:
                                493               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                494               0 :                     isgraph((unsigned char) c));
 4382 tgl                       495 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                496                 : #ifdef HAVE_LOCALE_T
                                497                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   498            6144 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
                                499                 : #endif
                                500                 :             /* FALL THRU */
                                501                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                502                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       503 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   504               0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
                                505                 : #endif
                                506                 :             break;
 2208 peter_e                   507 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
                                508                 : #ifdef USE_ICU
                                509            8195 :             return u_isgraph(c);
                                510                 : #endif
                                511                 :             break;
                                512                 :     }
 4382 tgl                       513 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                514                 : }
                                515                 : 
                                516                 : static int
 4382 tgl                       517 CBC       14339 : pg_wc_isprint(pg_wchar c)
                                518                 : {
                                519           14339 :     switch (pg_regex_strategy)
                                520                 :     {
 4382 tgl                       521 UBC           0 :         case PG_REGEX_LOCALE_C:
                                522               0 :             return (c <= (pg_wchar) 127 &&
                                523               0 :                     (pg_char_properties[c] & PG_ISPRINT));
                                524               0 :         case PG_REGEX_LOCALE_WIDE:
                                525                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                526               0 :                 return iswprint((wint_t) c);
                                527                 :             /* FALL THRU */
                                528                 :         case PG_REGEX_LOCALE_1BYTE:
                                529               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                530               0 :                     isprint((unsigned char) c));
 4382 tgl                       531 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                532                 : #ifdef HAVE_LOCALE_T
                                533                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   534            6144 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
                                535                 : #endif
                                536                 :             /* FALL THRU */
                                537                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                538                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       539 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   540               0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
                                541                 : #endif
                                542                 :             break;
 2208 peter_e                   543 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
                                544                 : #ifdef USE_ICU
                                545            8195 :             return u_isprint(c);
                                546                 : #endif
                                547                 :             break;
                                548                 :     }
 4382 tgl                       549 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                550                 : }
                                551                 : 
                                552                 : static int
 4382 tgl                       553 CBC       14339 : pg_wc_ispunct(pg_wchar c)
                                554                 : {
                                555           14339 :     switch (pg_regex_strategy)
                                556                 :     {
 4382 tgl                       557 UBC           0 :         case PG_REGEX_LOCALE_C:
                                558               0 :             return (c <= (pg_wchar) 127 &&
                                559               0 :                     (pg_char_properties[c] & PG_ISPUNCT));
                                560               0 :         case PG_REGEX_LOCALE_WIDE:
                                561                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                562               0 :                 return iswpunct((wint_t) c);
                                563                 :             /* FALL THRU */
                                564                 :         case PG_REGEX_LOCALE_1BYTE:
                                565               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                566               0 :                     ispunct((unsigned char) c));
 4382 tgl                       567 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                568                 : #ifdef HAVE_LOCALE_T
                                569                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   570            6144 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
                                571                 : #endif
                                572                 :             /* FALL THRU */
                                573                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                574                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       575 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   576               0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
                                577                 : #endif
                                578                 :             break;
 2208 peter_e                   579 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
                                580                 : #ifdef USE_ICU
                                581            8195 :             return u_ispunct(c);
                                582                 : #endif
                                583                 :             break;
                                584                 :     }
 4382 tgl                       585 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                586                 : }
                                587                 : 
                                588                 : static int
 4382 tgl                       589 CBC       44337 : pg_wc_isspace(pg_wchar c)
                                590                 : {
                                591           44337 :     switch (pg_regex_strategy)
                                592                 :     {
 4382 tgl                       593 UBC           0 :         case PG_REGEX_LOCALE_C:
                                594               0 :             return (c <= (pg_wchar) 127 &&
                                595               0 :                     (pg_char_properties[c] & PG_ISSPACE));
                                596               0 :         case PG_REGEX_LOCALE_WIDE:
                                597                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                598               0 :                 return iswspace((wint_t) c);
                                599                 :             /* FALL THRU */
                                600                 :         case PG_REGEX_LOCALE_1BYTE:
                                601               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                602               0 :                     isspace((unsigned char) c));
 4382 tgl                       603 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
                                604                 : #ifdef HAVE_LOCALE_T
                                605                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   606            6144 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
                                607                 : #endif
                                608                 :             /* FALL THRU */
                                609                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                610                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       611 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2208 peter_e                   612               0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
                                613                 : #endif
                                614                 :             break;
 2208 peter_e                   615 CBC       38193 :         case PG_REGEX_LOCALE_ICU:
                                616                 : #ifdef USE_ICU
                                617           38193 :             return u_isspace(c);
                                618                 : #endif
                                619                 :             break;
                                620                 :     }
 4382 tgl                       621 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                622                 : }
                                623                 : 
                                624                 : static pg_wchar
 4382 tgl                       625 CBC        5258 : pg_wc_toupper(pg_wchar c)
                                626                 : {
                                627            5258 :     switch (pg_regex_strategy)
                                628                 :     {
                                629             537 :         case PG_REGEX_LOCALE_C:
                                630             537 :             if (c <= (pg_wchar) 127)
                                631             537 :                 return pg_ascii_toupper((unsigned char) c);
 4382 tgl                       632 UBC           0 :             return c;
                                633               0 :         case PG_REGEX_LOCALE_WIDE:
                                634                 :             /* force C behavior for ASCII characters, per comments above */
                                635               0 :             if (c <= (pg_wchar) 127)
                                636               0 :                 return pg_ascii_toupper((unsigned char) c);
                                637                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                638               0 :                 return towupper((wint_t) c);
                                639                 :             /* FALL THRU */
                                640                 :         case PG_REGEX_LOCALE_1BYTE:
                                641                 :             /* force C behavior for ASCII characters, per comments above */
                                642               0 :             if (c <= (pg_wchar) 127)
                                643               0 :                 return pg_ascii_toupper((unsigned char) c);
                                644               0 :             if (c <= (pg_wchar) UCHAR_MAX)
                                645               0 :                 return toupper((unsigned char) c);
                                646               0 :             return c;
 4382 tgl                       647 CBC          54 :         case PG_REGEX_LOCALE_WIDE_L:
                                648                 : #ifdef HAVE_LOCALE_T
                                649                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   650              54 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
                                651                 : #endif
                                652                 :             /* FALL THRU */
                                653                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                654                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       655 UBC           0 :             if (c <= (pg_wchar) UCHAR_MAX)
 2208 peter_e                   656               0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
                                657                 : #endif
 4382 tgl                       658               0 :             return c;
 2208 peter_e                   659 CBC        4667 :         case PG_REGEX_LOCALE_ICU:
                                660                 : #ifdef USE_ICU
                                661            4667 :             return u_toupper(c);
                                662                 : #endif
                                663                 :             break;
                                664                 :     }
 4382 tgl                       665 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                666                 : }
                                667                 : 
                                668                 : static pg_wchar
 4382 tgl                       669 CBC        5260 : pg_wc_tolower(pg_wchar c)
                                670                 : {
                                671            5260 :     switch (pg_regex_strategy)
                                672                 :     {
                                673             537 :         case PG_REGEX_LOCALE_C:
                                674             537 :             if (c <= (pg_wchar) 127)
                                675             537 :                 return pg_ascii_tolower((unsigned char) c);
 4382 tgl                       676 UBC           0 :             return c;
                                677               0 :         case PG_REGEX_LOCALE_WIDE:
                                678                 :             /* force C behavior for ASCII characters, per comments above */
                                679               0 :             if (c <= (pg_wchar) 127)
                                680               0 :                 return pg_ascii_tolower((unsigned char) c);
                                681                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                682               0 :                 return towlower((wint_t) c);
                                683                 :             /* FALL THRU */
                                684                 :         case PG_REGEX_LOCALE_1BYTE:
                                685                 :             /* force C behavior for ASCII characters, per comments above */
                                686               0 :             if (c <= (pg_wchar) 127)
                                687               0 :                 return pg_ascii_tolower((unsigned char) c);
                                688               0 :             if (c <= (pg_wchar) UCHAR_MAX)
                                689               0 :                 return tolower((unsigned char) c);
                                690               0 :             return c;
 4382 tgl                       691 CBC          54 :         case PG_REGEX_LOCALE_WIDE_L:
                                692                 : #ifdef HAVE_LOCALE_T
                                693                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2208 peter_e                   694              54 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
                                695                 : #endif
                                696                 :             /* FALL THRU */
                                697                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                698                 : #ifdef HAVE_LOCALE_T
 4382 tgl                       699 UBC           0 :             if (c <= (pg_wchar) UCHAR_MAX)
 2208 peter_e                   700               0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
                                701                 : #endif
 4382 tgl                       702               0 :             return c;
 2208 peter_e                   703 CBC        4669 :         case PG_REGEX_LOCALE_ICU:
                                704                 : #ifdef USE_ICU
                                705            4669 :             return u_tolower(c);
                                706                 : #endif
                                707                 :             break;
                                708                 :     }
 4382 tgl                       709 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                710                 : }
                                711                 : 
                                712                 : 
                                713                 : /*
                                714                 :  * These functions cache the results of probing libc's ctype behavior for
                                715                 :  * all character codes of interest in a given encoding/collation.  The
                                716                 :  * result is provided as a "struct cvec", but notice that the representation
                                717                 :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
                                718                 :  * chrs[] and ranges[] arrays separately from the struct so that we can
                                719                 :  * realloc them larger at need.  This is okay since the cvecs made here
                                720                 :  * should never be freed by freecvec().
                                721                 :  *
                                722                 :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
                                723                 :  * the main regex code expects us to return a failure indication instead.
                                724                 :  */
                                725                 : 
                                726                 : typedef int (*pg_wc_probefunc) (pg_wchar c);
                                727                 : 
                                728                 : typedef struct pg_ctype_cache
                                729                 : {
                                730                 :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
                                731                 :     Oid         collation;      /* collation this entry is for */
                                732                 :     struct cvec cv;             /* cache entry contents */
                                733                 :     struct pg_ctype_cache *next;    /* chain link */
                                734                 : } pg_ctype_cache;
                                735                 : 
                                736                 : static pg_ctype_cache *pg_ctype_cache_list = NULL;
                                737                 : 
                                738                 : /*
                                739                 :  * Add a chr or range to pcc->cv; return false if run out of memory
                                740                 :  */
                                741                 : static bool
 4067 tgl                       742 CBC        5617 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
                                743                 : {
                                744                 :     chr        *newchrs;
                                745                 : 
                                746            5617 :     if (nchrs > 1)
                                747                 :     {
                                748            1731 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
                                749                 :         {
 4067 tgl                       750 UBC           0 :             pcc->cv.rangespace *= 2;
                                751               0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
                                752               0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
                                753               0 :             if (newchrs == NULL)
                                754               0 :                 return false;
                                755               0 :             pcc->cv.ranges = newchrs;
                                756                 :         }
 4067 tgl                       757 CBC        1731 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
                                758            1731 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
                                759            1731 :         pcc->cv.nranges++;
                                760                 :     }
                                761                 :     else
                                762                 :     {
                                763            3886 :         assert(nchrs == 1);
                                764            3886 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
                                765                 :         {
                                766              14 :             pcc->cv.chrspace *= 2;
                                767              14 :             newchrs = (chr *) realloc(pcc->cv.chrs,
                                768              14 :                                       pcc->cv.chrspace * sizeof(chr));
                                769              14 :             if (newchrs == NULL)
 4067 tgl                       770 UBC           0 :                 return false;
 4067 tgl                       771 CBC          14 :             pcc->cv.chrs = newchrs;
                                772                 :         }
                                773            3886 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
                                774                 :     }
                                775            5617 :     return true;
                                776                 : }
                                777                 : 
                                778                 : /*
                                779                 :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
                                780                 :  * chrs satisfying the probe function.  The active collation is the one
                                781                 :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
                                782                 :  *
                                783                 :  * Note that the result must not be freed or modified by caller.
                                784                 :  */
                                785                 : static struct cvec *
 2407                           786             363 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
                                787                 : {
                                788                 :     pg_ctype_cache *pcc;
                                789                 :     pg_wchar    max_chr;
                                790                 :     pg_wchar    cur_chr;
                                791                 :     int         nmatches;
                                792                 :     chr        *newchrs;
                                793                 : 
                                794                 :     /*
                                795                 :      * Do we already have the answer cached?
                                796                 :      */
 4067                           797             919 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
                                798                 :     {
                                799             794 :         if (pcc->probefunc == probefunc &&
                                800             262 :             pcc->collation == pg_regex_collation)
                                801             238 :             return &pcc->cv;
                                802                 :     }
                                803                 : 
                                804                 :     /*
                                805                 :      * Nope, so initialize some workspace ...
                                806                 :      */
                                807             125 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
                                808             125 :     if (pcc == NULL)
 4067 tgl                       809 UBC           0 :         return NULL;
 4067 tgl                       810 CBC         125 :     pcc->probefunc = probefunc;
                                811             125 :     pcc->collation = pg_regex_collation;
                                812             125 :     pcc->cv.nchrs = 0;
                                813             125 :     pcc->cv.chrspace = 128;
                                814             125 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
                                815             125 :     pcc->cv.nranges = 0;
                                816             125 :     pcc->cv.rangespace = 64;
                                817             125 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
                                818             125 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
 4067 tgl                       819 UBC           0 :         goto out_of_memory;
 2407 tgl                       820 CBC         125 :     pcc->cv.cclasscode = cclasscode;
                                821                 : 
                                822                 :     /*
                                823                 :      * Decide how many character codes we ought to look through.  In general
                                824                 :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
                                825                 :      * runtime using the "high colormap" mechanism.  However, in C locale
                                826                 :      * there's no need to go further than 127, and if we only have a 1-byte
                                827                 :      * <ctype.h> API there's no need to go further than that can handle.
                                828                 :      *
                                829                 :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
                                830                 :      * output cvec as not having any locale-dependent behavior, since there
                                831                 :      * will be no need to do any run-time locale checks.  (The #if's here
                                832                 :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
                                833                 :      * useful to allow it to be small for testing purposes.)
                                834                 :      */
 4067                           835             125 :     switch (pg_regex_strategy)
                                836                 :     {
                                837              10 :         case PG_REGEX_LOCALE_C:
                                838                 : #if MAX_SIMPLE_CHR >= 127
                                839              10 :             max_chr = (pg_wchar) 127;
 2407                           840              10 :             pcc->cv.cclasscode = -1;
                                841                 : #else
                                842                 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                843                 : #endif
 4067                           844              10 :             break;
                                845              27 :         case PG_REGEX_LOCALE_WIDE:
                                846                 :         case PG_REGEX_LOCALE_WIDE_L:
 2407                           847              27 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 4067                           848              27 :             break;
 4067 tgl                       849 UBC           0 :         case PG_REGEX_LOCALE_1BYTE:
                                850                 :         case PG_REGEX_LOCALE_1BYTE_L:
                                851                 : #if MAX_SIMPLE_CHR >= UCHAR_MAX
                                852               0 :             max_chr = (pg_wchar) UCHAR_MAX;
 2407                           853               0 :             pcc->cv.cclasscode = -1;
                                854                 : #else
                                855                 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                856                 : #endif
 4067                           857               0 :             break;
 2208 peter_e                   858 CBC          88 :         case PG_REGEX_LOCALE_ICU:
                                859              88 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                860              88 :             break;
 4067 tgl                       861 UBC           0 :         default:
                                862               0 :             max_chr = 0;        /* can't get here, but keep compiler quiet */
                                863               0 :             break;
                                864                 :     }
                                865                 : 
                                866                 :     /*
                                867                 :      * And scan 'em ...
                                868                 :      */
 4067 tgl                       869 CBC         125 :     nmatches = 0;               /* number of consecutive matches */
                                870                 : 
                                871          236925 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
                                872                 :     {
                                873          236800 :         if ((*probefunc) (cur_chr))
                                874           71695 :             nmatches++;
                                875          165105 :         else if (nmatches > 0)
                                876                 :         {
                                877            5600 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
 4067 tgl                       878 UBC           0 :                 goto out_of_memory;
 4067 tgl                       879 CBC        5600 :             nmatches = 0;
                                880                 :         }
                                881                 :     }
                                882                 : 
                                883             125 :     if (nmatches > 0)
                                884              17 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
 4067 tgl                       885 UBC           0 :             goto out_of_memory;
                                886                 : 
                                887                 :     /*
                                888                 :      * We might have allocated more memory than needed, if so free it
                                889                 :      */
 4067 tgl                       890 CBC         125 :     if (pcc->cv.nchrs == 0)
                                891                 :     {
                                892              43 :         free(pcc->cv.chrs);
                                893              43 :         pcc->cv.chrs = NULL;
                                894              43 :         pcc->cv.chrspace = 0;
                                895                 :     }
                                896              82 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
                                897                 :     {
                                898              82 :         newchrs = (chr *) realloc(pcc->cv.chrs,
                                899              82 :                                   pcc->cv.nchrs * sizeof(chr));
                                900              82 :         if (newchrs == NULL)
 4067 tgl                       901 UBC           0 :             goto out_of_memory;
 4067 tgl                       902 CBC          82 :         pcc->cv.chrs = newchrs;
                                903              82 :         pcc->cv.chrspace = pcc->cv.nchrs;
                                904                 :     }
                                905             125 :     if (pcc->cv.nranges == 0)
                                906                 :     {
 4067 tgl                       907 UBC           0 :         free(pcc->cv.ranges);
                                908               0 :         pcc->cv.ranges = NULL;
                                909               0 :         pcc->cv.rangespace = 0;
                                910                 :     }
 4067 tgl                       911 CBC         125 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
                                912                 :     {
                                913             125 :         newchrs = (chr *) realloc(pcc->cv.ranges,
                                914             125 :                                   pcc->cv.nranges * sizeof(chr) * 2);
                                915             125 :         if (newchrs == NULL)
 4067 tgl                       916 UBC           0 :             goto out_of_memory;
 4067 tgl                       917 CBC         125 :         pcc->cv.ranges = newchrs;
                                918             125 :         pcc->cv.rangespace = pcc->cv.nranges;
                                919                 :     }
                                920                 : 
                                921                 :     /*
                                922                 :      * Success, link it into cache chain
                                923                 :      */
                                924             125 :     pcc->next = pg_ctype_cache_list;
                                925             125 :     pg_ctype_cache_list = pcc;
                                926                 : 
                                927             125 :     return &pcc->cv;
                                928                 : 
                                929                 :     /*
                                930                 :      * Failure, clean up
                                931                 :      */
 4067 tgl                       932 UBC           0 : out_of_memory:
  297 peter                     933 UNC           0 :     free(pcc->cv.chrs);
                                934               0 :     free(pcc->cv.ranges);
 4067 tgl                       935 UBC           0 :     free(pcc);
                                936                 : 
 4067 tgl                       937 UIC           0 :     return NULL;
                                938                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a