LCOV - differential code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Coverage Total Hit UNC UIC UBC GNC CBC DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 54.9 % 315 173 2 1 139 1 172 3 1
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 15 15 2 13
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * regc_pg_locale.c
       4                 :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
       5                 :  *    and functions to cache the results of wholesale ctype probing.
       6                 :  *
       7                 :  * This file is #included by regcomp.c; it's not meant to compile standalone.
       8                 :  *
       9                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      10                 :  * Portions Copyright (c) 1994, Regents of the University of California
      11                 :  *
      12                 :  * IDENTIFICATION
      13                 :  *    src/backend/regex/regc_pg_locale.c
      14                 :  *
      15                 :  *-------------------------------------------------------------------------
      16                 :  */
      17                 : 
      18                 : #include "catalog/pg_collation.h"
      19                 : #include "utils/pg_locale.h"
      20                 : 
      21                 : /*
      22                 :  * To provide as much functionality as possible on a variety of platforms,
      23                 :  * without going so far as to implement everything from scratch, we use
      24                 :  * several implementation strategies depending on the situation:
      25                 :  *
      26                 :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
      27                 :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
      28                 :  * collations don't give a fig about multibyte characters.
      29                 :  *
      30                 :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
      31                 :  *
      32                 :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
      33                 :  * This assumes that every platform uses Unicode codepoints directly
      34                 :  * as the wchar_t representation of Unicode.  On some platforms
      35                 :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
      36                 :  *
      37                 :  * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
      38                 :  * values up to 255, and punt for values above that.  This is 100% correct
      39                 :  * only in single-byte encodings such as LATINn.  However, non-Unicode
      40                 :  * multibyte encodings are mostly Far Eastern character sets for which the
      41                 :  * properties being tested here aren't very relevant for higher code values
      42                 :  * anyway.  The difficulty with using the <wctype.h> functions with
      43                 :  * non-Unicode multibyte encodings is that we can have no certainty that
      44                 :  * the platform's wchar_t representation matches what we do in pg_wchar
      45                 :  * conversions.
      46                 :  *
      47                 :  * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
      48                 :  * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
      49                 :  * functions, under exactly the same cases as #2.
      50                 :  *
      51                 :  * There is one notable difference between cases 2 and 3: in the "default"
      52                 :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
      53                 :  * while in a non-default collation we just let the library functions do what
      54                 :  * they will.  The case where this matters is treatment of I/i in Turkish,
      55                 :  * and the behavior is meant to match the upper()/lower() SQL functions.
      56                 :  *
      57                 :  * We store the active collation setting in static variables.  In principle
      58                 :  * it could be passed down to here via the regex library's "struct vars" data
      59                 :  * structure; but that would require somewhat invasive changes in the regex
      60                 :  * library, and right now there's no real benefit to be gained from that.
      61                 :  *
      62                 :  * NB: the coding here assumes pg_wchar is an unsigned type.
      63                 :  */
      64                 : 
      65                 : typedef enum
      66                 : {
      67                 :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
      68                 :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
      69                 :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
      70                 :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
      71                 :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
      72                 :     PG_REGEX_LOCALE_ICU         /* Use ICU uchar.h functions */
      73                 : } PG_Locale_Strategy;
      74                 : 
      75                 : static PG_Locale_Strategy pg_regex_strategy;
      76                 : static pg_locale_t pg_regex_locale;
      77                 : static Oid  pg_regex_collation;
      78                 : 
      79                 : /*
      80                 :  * Hard-wired character properties for C locale
      81                 :  */
      82                 : #define PG_ISDIGIT  0x01
      83                 : #define PG_ISALPHA  0x02
      84                 : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
      85                 : #define PG_ISUPPER  0x04
      86                 : #define PG_ISLOWER  0x08
      87                 : #define PG_ISGRAPH  0x10
      88                 : #define PG_ISPRINT  0x20
      89                 : #define PG_ISPUNCT  0x40
      90                 : #define PG_ISSPACE  0x80
      91                 : 
      92                 : static const unsigned char pg_char_properties[128] = {
      93                 :      /* NUL */ 0,
      94                 :      /* ^A */ 0,
      95                 :      /* ^B */ 0,
      96                 :      /* ^C */ 0,
      97                 :      /* ^D */ 0,
      98                 :      /* ^E */ 0,
      99                 :      /* ^F */ 0,
     100                 :      /* ^G */ 0,
     101                 :      /* ^H */ 0,
     102                 :      /* ^I */ PG_ISSPACE,
     103                 :      /* ^J */ PG_ISSPACE,
     104                 :      /* ^K */ PG_ISSPACE,
     105                 :      /* ^L */ PG_ISSPACE,
     106                 :      /* ^M */ PG_ISSPACE,
     107                 :      /* ^N */ 0,
     108                 :      /* ^O */ 0,
     109                 :      /* ^P */ 0,
     110                 :      /* ^Q */ 0,
     111                 :      /* ^R */ 0,
     112                 :      /* ^S */ 0,
     113                 :      /* ^T */ 0,
     114                 :      /* ^U */ 0,
     115                 :      /* ^V */ 0,
     116                 :      /* ^W */ 0,
     117                 :      /* ^X */ 0,
     118                 :      /* ^Y */ 0,
     119                 :      /* ^Z */ 0,
     120                 :      /* ^[ */ 0,
     121                 :      /* ^\ */ 0,
     122                 :      /* ^] */ 0,
     123                 :      /* ^^ */ 0,
     124                 :      /* ^_ */ 0,
     125                 :      /* */ PG_ISPRINT | PG_ISSPACE,
     126                 :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     127                 :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     128                 :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     129                 :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     130                 :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     131                 :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     132                 :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     133                 :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     134                 :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     135                 :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     136                 :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     137                 :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     138                 :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     139                 :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     140                 :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     141                 :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     142                 :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     143                 :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     144                 :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     145                 :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     146                 :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     147                 :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     148                 :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     149                 :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     150                 :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
     151                 :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     152                 :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     153                 :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     154                 :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     155                 :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     156                 :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     157                 :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     158                 :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     159                 :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     160                 :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     161                 :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     162                 :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     163                 :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     164                 :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     165                 :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     166                 :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     167                 :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     168                 :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     169                 :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     170                 :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     171                 :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     172                 :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     173                 :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     174                 :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     175                 :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     176                 :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     177                 :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     178                 :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     179                 :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     180                 :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     181                 :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     182                 :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     183                 :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
     184                 :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     185                 :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     186                 :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     187                 :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     188                 :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     189                 :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     190                 :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     191                 :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     192                 :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     193                 :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     194                 :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     195                 :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     196                 :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     197                 :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     198                 :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     199                 :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     200                 :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     201                 :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     202                 :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     203                 :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     204                 :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     205                 :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     206                 :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     207                 :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     208                 :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     209                 :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     210                 :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     211                 :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     212                 :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     213                 :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     214                 :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     215                 :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
     216                 :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     217                 :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     218                 :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     219                 :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
     220                 :      /* DEL */ 0
     221                 : };
     222                 : 
     223                 : 
     224                 : /*
     225                 :  * pg_set_regex_collation: set collation for these functions to obey
     226                 :  *
     227                 :  * This is called when beginning compilation or execution of a regexp.
     228                 :  * Since there's no need for reentrancy of regexp operations, it's okay
     229                 :  * to store the results in static variables.
     230                 :  */
     231                 : void
     232 CBC      912287 : pg_set_regex_collation(Oid collation)
     233                 : {
     234          912287 :     if (!OidIsValid(collation))
     235                 :     {
     236                 :         /*
     237                 :          * This typically means that the parser could not resolve a conflict
     238                 :          * of implicit collations, so report it that way.
     239                 :          */
     240 UBC           0 :         ereport(ERROR,
     241                 :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
     242                 :                  errmsg("could not determine which collation to use for regular expression"),
     243                 :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
     244                 :     }
     245                 : 
     246 CBC      912287 :     if (lc_ctype_is_c(collation))
     247                 :     {
     248                 :         /* C/POSIX collations use this path regardless of database encoding */
     249           62841 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
     250           62841 :         pg_regex_locale = 0;
     251           62841 :         pg_regex_collation = C_COLLATION_OID;
     252                 :     }
     253                 :     else
     254                 :     {
     255                 :         /*
     256                 :          * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; the
     257                 :          * case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not have to
     258                 :          * be considered below.
     259                 :          */
     260          849446 :         pg_regex_locale = pg_newlocale_from_collation(collation);
     261                 : 
     262 GNC      849446 :         if (!pg_locale_deterministic(pg_regex_locale))
     263 CBC          12 :             ereport(ERROR,
     264                 :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
     265                 :                      errmsg("nondeterministic collations are not supported for regular expressions")));
     266                 : 
     267                 : #ifdef USE_ICU
     268          849434 :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
     269          848999 :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
     270                 :         else
     271                 : #endif
     272             435 :         if (GetDatabaseEncoding() == PG_UTF8)
     273                 :         {
     274             433 :             if (pg_regex_locale)
     275             429 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
     276                 :             else
     277               4 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
     278                 :         }
     279                 :         else
     280                 :         {
     281               2 :             if (pg_regex_locale)
     282 UBC           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
     283                 :             else
     284 CBC           2 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
     285                 :         }
     286                 : 
     287          849434 :         pg_regex_collation = collation;
     288                 :     }
     289          912275 : }
     290                 : 
     291                 : static int
     292           74885 : pg_wc_isdigit(pg_wchar c)
     293                 : {
     294           74885 :     switch (pg_regex_strategy)
     295                 :     {
     296             943 :         case PG_REGEX_LOCALE_C:
     297            1886 :             return (c <= (pg_wchar) 127 &&
     298             943 :                     (pg_char_properties[c] & PG_ISDIGIT));
     299 UBC           0 :         case PG_REGEX_LOCALE_WIDE:
     300                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     301               0 :                 return iswdigit((wint_t) c);
     302                 :             /* FALL THRU */
     303                 :         case PG_REGEX_LOCALE_1BYTE:
     304               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     305               0 :                     isdigit((unsigned char) c));
     306 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     307                 : #ifdef HAVE_LOCALE_T
     308                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     309            6144 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
     310                 : #endif
     311                 :             /* FALL THRU */
     312                 :         case PG_REGEX_LOCALE_1BYTE_L:
     313                 : #ifdef HAVE_LOCALE_T
     314 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     315               0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
     316                 : #endif
     317                 :             break;
     318 CBC       67798 :         case PG_REGEX_LOCALE_ICU:
     319                 : #ifdef USE_ICU
     320           67798 :             return u_isdigit(c);
     321                 : #endif
     322                 :             break;
     323                 :     }
     324 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     325                 : }
     326                 : 
     327                 : static int
     328 CBC       14475 : pg_wc_isalpha(pg_wchar c)
     329                 : {
     330           14475 :     switch (pg_regex_strategy)
     331                 :     {
     332 UBC           0 :         case PG_REGEX_LOCALE_C:
     333               0 :             return (c <= (pg_wchar) 127 &&
     334               0 :                     (pg_char_properties[c] & PG_ISALPHA));
     335               0 :         case PG_REGEX_LOCALE_WIDE:
     336                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     337               0 :                 return iswalpha((wint_t) c);
     338                 :             /* FALL THRU */
     339                 :         case PG_REGEX_LOCALE_1BYTE:
     340               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     341               0 :                     isalpha((unsigned char) c));
     342 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     343                 : #ifdef HAVE_LOCALE_T
     344                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     345            6144 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
     346                 : #endif
     347                 :             /* FALL THRU */
     348                 :         case PG_REGEX_LOCALE_1BYTE_L:
     349                 : #ifdef HAVE_LOCALE_T
     350 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     351               0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
     352                 : #endif
     353                 :             break;
     354 CBC        8331 :         case PG_REGEX_LOCALE_ICU:
     355                 : #ifdef USE_ICU
     356            8331 :             return u_isalpha(c);
     357                 : #endif
     358                 :             break;
     359                 :     }
     360 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     361                 : }
     362                 : 
     363                 : static int
     364 CBC       34506 : pg_wc_isalnum(pg_wchar c)
     365                 : {
     366           34506 :     switch (pg_regex_strategy)
     367                 :     {
     368             821 :         case PG_REGEX_LOCALE_C:
     369            1642 :             return (c <= (pg_wchar) 127 &&
     370             821 :                     (pg_char_properties[c] & PG_ISALNUM));
     371 UBC           0 :         case PG_REGEX_LOCALE_WIDE:
     372                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     373               0 :                 return iswalnum((wint_t) c);
     374                 :             /* FALL THRU */
     375                 :         case PG_REGEX_LOCALE_1BYTE:
     376               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     377               0 :                     isalnum((unsigned char) c));
     378 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     379                 : #ifdef HAVE_LOCALE_T
     380                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     381            6144 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
     382                 : #endif
     383                 :             /* FALL THRU */
     384                 :         case PG_REGEX_LOCALE_1BYTE_L:
     385                 : #ifdef HAVE_LOCALE_T
     386 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     387               0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
     388                 : #endif
     389                 :             break;
     390 CBC       27541 :         case PG_REGEX_LOCALE_ICU:
     391                 : #ifdef USE_ICU
     392           27541 :             return u_isalnum(c);
     393                 : #endif
     394                 :             break;
     395                 :     }
     396 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     397                 : }
     398                 : 
     399                 : static int
     400 CBC       16769 : pg_wc_isword(pg_wchar c)
     401                 : {
     402                 :     /* We define word characters as alnum class plus underscore */
     403           16769 :     if (c == CHR('_'))
     404              11 :         return 1;
     405           16758 :     return pg_wc_isalnum(c);
     406                 : }
     407                 : 
     408                 : static int
     409           14344 : pg_wc_isupper(pg_wchar c)
     410                 : {
     411           14344 :     switch (pg_regex_strategy)
     412                 :     {
     413 UBC           0 :         case PG_REGEX_LOCALE_C:
     414               0 :             return (c <= (pg_wchar) 127 &&
     415               0 :                     (pg_char_properties[c] & PG_ISUPPER));
     416               0 :         case PG_REGEX_LOCALE_WIDE:
     417                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     418               0 :                 return iswupper((wint_t) c);
     419                 :             /* FALL THRU */
     420                 :         case PG_REGEX_LOCALE_1BYTE:
     421               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     422               0 :                     isupper((unsigned char) c));
     423 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     424                 : #ifdef HAVE_LOCALE_T
     425                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     426            6144 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
     427                 : #endif
     428                 :             /* FALL THRU */
     429                 :         case PG_REGEX_LOCALE_1BYTE_L:
     430                 : #ifdef HAVE_LOCALE_T
     431 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     432               0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
     433                 : #endif
     434                 :             break;
     435 CBC        8200 :         case PG_REGEX_LOCALE_ICU:
     436                 : #ifdef USE_ICU
     437            8200 :             return u_isupper(c);
     438                 : #endif
     439                 :             break;
     440                 :     }
     441 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     442                 : }
     443                 : 
     444                 : static int
     445 CBC       14339 : pg_wc_islower(pg_wchar c)
     446                 : {
     447           14339 :     switch (pg_regex_strategy)
     448                 :     {
     449 UBC           0 :         case PG_REGEX_LOCALE_C:
     450               0 :             return (c <= (pg_wchar) 127 &&
     451               0 :                     (pg_char_properties[c] & PG_ISLOWER));
     452               0 :         case PG_REGEX_LOCALE_WIDE:
     453                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     454               0 :                 return iswlower((wint_t) c);
     455                 :             /* FALL THRU */
     456                 :         case PG_REGEX_LOCALE_1BYTE:
     457               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     458               0 :                     islower((unsigned char) c));
     459 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     460                 : #ifdef HAVE_LOCALE_T
     461                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     462            6144 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
     463                 : #endif
     464                 :             /* FALL THRU */
     465                 :         case PG_REGEX_LOCALE_1BYTE_L:
     466                 : #ifdef HAVE_LOCALE_T
     467 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     468               0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
     469                 : #endif
     470                 :             break;
     471 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
     472                 : #ifdef USE_ICU
     473            8195 :             return u_islower(c);
     474                 : #endif
     475                 :             break;
     476                 :     }
     477 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     478                 : }
     479                 : 
     480                 : static int
     481 CBC       14339 : pg_wc_isgraph(pg_wchar c)
     482                 : {
     483           14339 :     switch (pg_regex_strategy)
     484                 :     {
     485 UBC           0 :         case PG_REGEX_LOCALE_C:
     486               0 :             return (c <= (pg_wchar) 127 &&
     487               0 :                     (pg_char_properties[c] & PG_ISGRAPH));
     488               0 :         case PG_REGEX_LOCALE_WIDE:
     489                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     490               0 :                 return iswgraph((wint_t) c);
     491                 :             /* FALL THRU */
     492                 :         case PG_REGEX_LOCALE_1BYTE:
     493               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     494               0 :                     isgraph((unsigned char) c));
     495 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     496                 : #ifdef HAVE_LOCALE_T
     497                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     498            6144 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
     499                 : #endif
     500                 :             /* FALL THRU */
     501                 :         case PG_REGEX_LOCALE_1BYTE_L:
     502                 : #ifdef HAVE_LOCALE_T
     503 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     504               0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
     505                 : #endif
     506                 :             break;
     507 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
     508                 : #ifdef USE_ICU
     509            8195 :             return u_isgraph(c);
     510                 : #endif
     511                 :             break;
     512                 :     }
     513 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     514                 : }
     515                 : 
     516                 : static int
     517 CBC       14339 : pg_wc_isprint(pg_wchar c)
     518                 : {
     519           14339 :     switch (pg_regex_strategy)
     520                 :     {
     521 UBC           0 :         case PG_REGEX_LOCALE_C:
     522               0 :             return (c <= (pg_wchar) 127 &&
     523               0 :                     (pg_char_properties[c] & PG_ISPRINT));
     524               0 :         case PG_REGEX_LOCALE_WIDE:
     525                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     526               0 :                 return iswprint((wint_t) c);
     527                 :             /* FALL THRU */
     528                 :         case PG_REGEX_LOCALE_1BYTE:
     529               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     530               0 :                     isprint((unsigned char) c));
     531 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     532                 : #ifdef HAVE_LOCALE_T
     533                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     534            6144 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
     535                 : #endif
     536                 :             /* FALL THRU */
     537                 :         case PG_REGEX_LOCALE_1BYTE_L:
     538                 : #ifdef HAVE_LOCALE_T
     539 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     540               0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
     541                 : #endif
     542                 :             break;
     543 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
     544                 : #ifdef USE_ICU
     545            8195 :             return u_isprint(c);
     546                 : #endif
     547                 :             break;
     548                 :     }
     549 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     550                 : }
     551                 : 
     552                 : static int
     553 CBC       14339 : pg_wc_ispunct(pg_wchar c)
     554                 : {
     555           14339 :     switch (pg_regex_strategy)
     556                 :     {
     557 UBC           0 :         case PG_REGEX_LOCALE_C:
     558               0 :             return (c <= (pg_wchar) 127 &&
     559               0 :                     (pg_char_properties[c] & PG_ISPUNCT));
     560               0 :         case PG_REGEX_LOCALE_WIDE:
     561                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     562               0 :                 return iswpunct((wint_t) c);
     563                 :             /* FALL THRU */
     564                 :         case PG_REGEX_LOCALE_1BYTE:
     565               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     566               0 :                     ispunct((unsigned char) c));
     567 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     568                 : #ifdef HAVE_LOCALE_T
     569                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     570            6144 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
     571                 : #endif
     572                 :             /* FALL THRU */
     573                 :         case PG_REGEX_LOCALE_1BYTE_L:
     574                 : #ifdef HAVE_LOCALE_T
     575 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     576               0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
     577                 : #endif
     578                 :             break;
     579 CBC        8195 :         case PG_REGEX_LOCALE_ICU:
     580                 : #ifdef USE_ICU
     581            8195 :             return u_ispunct(c);
     582                 : #endif
     583                 :             break;
     584                 :     }
     585 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     586                 : }
     587                 : 
     588                 : static int
     589 CBC       44337 : pg_wc_isspace(pg_wchar c)
     590                 : {
     591           44337 :     switch (pg_regex_strategy)
     592                 :     {
     593 UBC           0 :         case PG_REGEX_LOCALE_C:
     594               0 :             return (c <= (pg_wchar) 127 &&
     595               0 :                     (pg_char_properties[c] & PG_ISSPACE));
     596               0 :         case PG_REGEX_LOCALE_WIDE:
     597                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     598               0 :                 return iswspace((wint_t) c);
     599                 :             /* FALL THRU */
     600                 :         case PG_REGEX_LOCALE_1BYTE:
     601               0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     602               0 :                     isspace((unsigned char) c));
     603 CBC        6144 :         case PG_REGEX_LOCALE_WIDE_L:
     604                 : #ifdef HAVE_LOCALE_T
     605                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     606            6144 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
     607                 : #endif
     608                 :             /* FALL THRU */
     609                 :         case PG_REGEX_LOCALE_1BYTE_L:
     610                 : #ifdef HAVE_LOCALE_T
     611 UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
     612               0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
     613                 : #endif
     614                 :             break;
     615 CBC       38193 :         case PG_REGEX_LOCALE_ICU:
     616                 : #ifdef USE_ICU
     617           38193 :             return u_isspace(c);
     618                 : #endif
     619                 :             break;
     620                 :     }
     621 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     622                 : }
     623                 : 
     624                 : static pg_wchar
     625 CBC        5258 : pg_wc_toupper(pg_wchar c)
     626                 : {
     627            5258 :     switch (pg_regex_strategy)
     628                 :     {
     629             537 :         case PG_REGEX_LOCALE_C:
     630             537 :             if (c <= (pg_wchar) 127)
     631             537 :                 return pg_ascii_toupper((unsigned char) c);
     632 UBC           0 :             return c;
     633               0 :         case PG_REGEX_LOCALE_WIDE:
     634                 :             /* force C behavior for ASCII characters, per comments above */
     635               0 :             if (c <= (pg_wchar) 127)
     636               0 :                 return pg_ascii_toupper((unsigned char) c);
     637                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     638               0 :                 return towupper((wint_t) c);
     639                 :             /* FALL THRU */
     640                 :         case PG_REGEX_LOCALE_1BYTE:
     641                 :             /* force C behavior for ASCII characters, per comments above */
     642               0 :             if (c <= (pg_wchar) 127)
     643               0 :                 return pg_ascii_toupper((unsigned char) c);
     644               0 :             if (c <= (pg_wchar) UCHAR_MAX)
     645               0 :                 return toupper((unsigned char) c);
     646               0 :             return c;
     647 CBC          54 :         case PG_REGEX_LOCALE_WIDE_L:
     648                 : #ifdef HAVE_LOCALE_T
     649                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     650              54 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
     651                 : #endif
     652                 :             /* FALL THRU */
     653                 :         case PG_REGEX_LOCALE_1BYTE_L:
     654                 : #ifdef HAVE_LOCALE_T
     655 UBC           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     656               0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
     657                 : #endif
     658               0 :             return c;
     659 CBC        4667 :         case PG_REGEX_LOCALE_ICU:
     660                 : #ifdef USE_ICU
     661            4667 :             return u_toupper(c);
     662                 : #endif
     663                 :             break;
     664                 :     }
     665 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     666                 : }
     667                 : 
     668                 : static pg_wchar
     669 CBC        5260 : pg_wc_tolower(pg_wchar c)
     670                 : {
     671            5260 :     switch (pg_regex_strategy)
     672                 :     {
     673             537 :         case PG_REGEX_LOCALE_C:
     674             537 :             if (c <= (pg_wchar) 127)
     675             537 :                 return pg_ascii_tolower((unsigned char) c);
     676 UBC           0 :             return c;
     677               0 :         case PG_REGEX_LOCALE_WIDE:
     678                 :             /* force C behavior for ASCII characters, per comments above */
     679               0 :             if (c <= (pg_wchar) 127)
     680               0 :                 return pg_ascii_tolower((unsigned char) c);
     681                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     682               0 :                 return towlower((wint_t) c);
     683                 :             /* FALL THRU */
     684                 :         case PG_REGEX_LOCALE_1BYTE:
     685                 :             /* force C behavior for ASCII characters, per comments above */
     686               0 :             if (c <= (pg_wchar) 127)
     687               0 :                 return pg_ascii_tolower((unsigned char) c);
     688               0 :             if (c <= (pg_wchar) UCHAR_MAX)
     689               0 :                 return tolower((unsigned char) c);
     690               0 :             return c;
     691 CBC          54 :         case PG_REGEX_LOCALE_WIDE_L:
     692                 : #ifdef HAVE_LOCALE_T
     693                 :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
     694              54 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
     695                 : #endif
     696                 :             /* FALL THRU */
     697                 :         case PG_REGEX_LOCALE_1BYTE_L:
     698                 : #ifdef HAVE_LOCALE_T
     699 UBC           0 :             if (c <= (pg_wchar) UCHAR_MAX)
     700               0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
     701                 : #endif
     702               0 :             return c;
     703 CBC        4669 :         case PG_REGEX_LOCALE_ICU:
     704                 : #ifdef USE_ICU
     705            4669 :             return u_tolower(c);
     706                 : #endif
     707                 :             break;
     708                 :     }
     709 UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
     710                 : }
     711                 : 
     712                 : 
     713                 : /*
     714                 :  * These functions cache the results of probing libc's ctype behavior for
     715                 :  * all character codes of interest in a given encoding/collation.  The
     716                 :  * result is provided as a "struct cvec", but notice that the representation
     717                 :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
     718                 :  * chrs[] and ranges[] arrays separately from the struct so that we can
     719                 :  * realloc them larger at need.  This is okay since the cvecs made here
     720                 :  * should never be freed by freecvec().
     721                 :  *
     722                 :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
     723                 :  * the main regex code expects us to return a failure indication instead.
     724                 :  */
     725                 : 
     726                 : typedef int (*pg_wc_probefunc) (pg_wchar c);
     727                 : 
     728                 : typedef struct pg_ctype_cache
     729                 : {
     730                 :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
     731                 :     Oid         collation;      /* collation this entry is for */
     732                 :     struct cvec cv;             /* cache entry contents */
     733                 :     struct pg_ctype_cache *next;    /* chain link */
     734                 : } pg_ctype_cache;
     735                 : 
     736                 : static pg_ctype_cache *pg_ctype_cache_list = NULL;
     737                 : 
     738                 : /*
     739                 :  * Add a chr or range to pcc->cv; return false if run out of memory
     740                 :  */
     741                 : static bool
     742 CBC        5617 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
     743                 : {
     744                 :     chr        *newchrs;
     745                 : 
     746            5617 :     if (nchrs > 1)
     747                 :     {
     748            1731 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
     749                 :         {
     750 UBC           0 :             pcc->cv.rangespace *= 2;
     751               0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
     752               0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
     753               0 :             if (newchrs == NULL)
     754               0 :                 return false;
     755               0 :             pcc->cv.ranges = newchrs;
     756                 :         }
     757 CBC        1731 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
     758            1731 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
     759            1731 :         pcc->cv.nranges++;
     760                 :     }
     761                 :     else
     762                 :     {
     763            3886 :         assert(nchrs == 1);
     764            3886 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
     765                 :         {
     766              14 :             pcc->cv.chrspace *= 2;
     767              14 :             newchrs = (chr *) realloc(pcc->cv.chrs,
     768              14 :                                       pcc->cv.chrspace * sizeof(chr));
     769              14 :             if (newchrs == NULL)
     770 UBC           0 :                 return false;
     771 CBC          14 :             pcc->cv.chrs = newchrs;
     772                 :         }
     773            3886 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
     774                 :     }
     775            5617 :     return true;
     776                 : }
     777                 : 
     778                 : /*
     779                 :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
     780                 :  * chrs satisfying the probe function.  The active collation is the one
     781                 :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
     782                 :  *
     783                 :  * Note that the result must not be freed or modified by caller.
     784                 :  */
     785                 : static struct cvec *
     786             363 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
     787                 : {
     788                 :     pg_ctype_cache *pcc;
     789                 :     pg_wchar    max_chr;
     790                 :     pg_wchar    cur_chr;
     791                 :     int         nmatches;
     792                 :     chr        *newchrs;
     793                 : 
     794                 :     /*
     795                 :      * Do we already have the answer cached?
     796                 :      */
     797             919 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
     798                 :     {
     799             794 :         if (pcc->probefunc == probefunc &&
     800             262 :             pcc->collation == pg_regex_collation)
     801             238 :             return &pcc->cv;
     802                 :     }
     803                 : 
     804                 :     /*
     805                 :      * Nope, so initialize some workspace ...
     806                 :      */
     807             125 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
     808             125 :     if (pcc == NULL)
     809 UBC           0 :         return NULL;
     810 CBC         125 :     pcc->probefunc = probefunc;
     811             125 :     pcc->collation = pg_regex_collation;
     812             125 :     pcc->cv.nchrs = 0;
     813             125 :     pcc->cv.chrspace = 128;
     814             125 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
     815             125 :     pcc->cv.nranges = 0;
     816             125 :     pcc->cv.rangespace = 64;
     817             125 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
     818             125 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
     819 UBC           0 :         goto out_of_memory;
     820 CBC         125 :     pcc->cv.cclasscode = cclasscode;
     821                 : 
     822                 :     /*
     823                 :      * Decide how many character codes we ought to look through.  In general
     824                 :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
     825                 :      * runtime using the "high colormap" mechanism.  However, in C locale
     826                 :      * there's no need to go further than 127, and if we only have a 1-byte
     827                 :      * <ctype.h> API there's no need to go further than that can handle.
     828                 :      *
     829                 :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
     830                 :      * output cvec as not having any locale-dependent behavior, since there
     831                 :      * will be no need to do any run-time locale checks.  (The #if's here
     832                 :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
     833                 :      * useful to allow it to be small for testing purposes.)
     834                 :      */
     835             125 :     switch (pg_regex_strategy)
     836                 :     {
     837              10 :         case PG_REGEX_LOCALE_C:
     838                 : #if MAX_SIMPLE_CHR >= 127
     839              10 :             max_chr = (pg_wchar) 127;
     840              10 :             pcc->cv.cclasscode = -1;
     841                 : #else
     842                 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     843                 : #endif
     844              10 :             break;
     845              27 :         case PG_REGEX_LOCALE_WIDE:
     846                 :         case PG_REGEX_LOCALE_WIDE_L:
     847              27 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     848              27 :             break;
     849 UBC           0 :         case PG_REGEX_LOCALE_1BYTE:
     850                 :         case PG_REGEX_LOCALE_1BYTE_L:
     851                 : #if MAX_SIMPLE_CHR >= UCHAR_MAX
     852               0 :             max_chr = (pg_wchar) UCHAR_MAX;
     853               0 :             pcc->cv.cclasscode = -1;
     854                 : #else
     855                 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     856                 : #endif
     857               0 :             break;
     858 CBC          88 :         case PG_REGEX_LOCALE_ICU:
     859              88 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
     860              88 :             break;
     861 UBC           0 :         default:
     862               0 :             max_chr = 0;        /* can't get here, but keep compiler quiet */
     863               0 :             break;
     864                 :     }
     865                 : 
     866                 :     /*
     867                 :      * And scan 'em ...
     868                 :      */
     869 CBC         125 :     nmatches = 0;               /* number of consecutive matches */
     870                 : 
     871          236925 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
     872                 :     {
     873          236800 :         if ((*probefunc) (cur_chr))
     874           71695 :             nmatches++;
     875          165105 :         else if (nmatches > 0)
     876                 :         {
     877            5600 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
     878 UBC           0 :                 goto out_of_memory;
     879 CBC        5600 :             nmatches = 0;
     880                 :         }
     881                 :     }
     882                 : 
     883             125 :     if (nmatches > 0)
     884              17 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
     885 UBC           0 :             goto out_of_memory;
     886                 : 
     887                 :     /*
     888                 :      * We might have allocated more memory than needed, if so free it
     889                 :      */
     890 CBC         125 :     if (pcc->cv.nchrs == 0)
     891                 :     {
     892              43 :         free(pcc->cv.chrs);
     893              43 :         pcc->cv.chrs = NULL;
     894              43 :         pcc->cv.chrspace = 0;
     895                 :     }
     896              82 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
     897                 :     {
     898              82 :         newchrs = (chr *) realloc(pcc->cv.chrs,
     899              82 :                                   pcc->cv.nchrs * sizeof(chr));
     900              82 :         if (newchrs == NULL)
     901 UBC           0 :             goto out_of_memory;
     902 CBC          82 :         pcc->cv.chrs = newchrs;
     903              82 :         pcc->cv.chrspace = pcc->cv.nchrs;
     904                 :     }
     905             125 :     if (pcc->cv.nranges == 0)
     906                 :     {
     907 UBC           0 :         free(pcc->cv.ranges);
     908               0 :         pcc->cv.ranges = NULL;
     909               0 :         pcc->cv.rangespace = 0;
     910                 :     }
     911 CBC         125 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
     912                 :     {
     913             125 :         newchrs = (chr *) realloc(pcc->cv.ranges,
     914             125 :                                   pcc->cv.nranges * sizeof(chr) * 2);
     915             125 :         if (newchrs == NULL)
     916 UBC           0 :             goto out_of_memory;
     917 CBC         125 :         pcc->cv.ranges = newchrs;
     918             125 :         pcc->cv.rangespace = pcc->cv.nranges;
     919                 :     }
     920                 : 
     921                 :     /*
     922                 :      * Success, link it into cache chain
     923                 :      */
     924             125 :     pcc->next = pg_ctype_cache_list;
     925             125 :     pg_ctype_cache_list = pcc;
     926                 : 
     927             125 :     return &pcc->cv;
     928                 : 
     929                 :     /*
     930                 :      * Failure, clean up
     931                 :      */
     932 UBC           0 : out_of_memory:
     933 UNC           0 :     free(pcc->cv.chrs);
     934               0 :     free(pcc->cv.ranges);
     935 UBC           0 :     free(pcc);
     936                 : 
     937 UIC           0 :     return NULL;
     938                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a