LCOV - differential code coverage report
Current view: top level - src/backend/regex - regc_pg_locale.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GNC CBC EUB DUB
Current: Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128 Lines: 57.8 % 341 197 8 136 21 176 2 1
Current Date: 2024-04-14 14:21:10 Functions: 100.0 % 15 15 13 2
Baseline: 16@8cea358b128 Branches: 34.4 % 294 101 1 15 12 165 26 1 74
Baseline Date: 2024-04-14 14:21:09 Line coverage date bins:
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed [..60] days: 72.4 % 29 21 8 21
(240..) days: 56.4 % 312 176 136 176 2
Function coverage date bins:
(240..) days: 100.0 % 15 15 13 2
Branch coverage date bins:
[..60] days: 50.0 % 2 1 1 1
(240..) days: 34.2 % 292 100 15 12 165 26 74

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * regc_pg_locale.c
                                  4                 :                :  *    ctype functions adapted to work on pg_wchar (a/k/a chr),
                                  5                 :                :  *    and functions to cache the results of wholesale ctype probing.
                                  6                 :                :  *
                                  7                 :                :  * This file is #included by regcomp.c; it's not meant to compile standalone.
                                  8                 :                :  *
                                  9                 :                :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
                                 10                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                 11                 :                :  *
                                 12                 :                :  * IDENTIFICATION
                                 13                 :                :  *    src/backend/regex/regc_pg_locale.c
                                 14                 :                :  *
                                 15                 :                :  *-------------------------------------------------------------------------
                                 16                 :                :  */
                                 17                 :                : 
                                 18                 :                : #include "catalog/pg_collation.h"
                                 19                 :                : #include "common/unicode_case.h"
                                 20                 :                : #include "common/unicode_category.h"
                                 21                 :                : #include "utils/pg_locale.h"
                                 22                 :                : 
                                 23                 :                : /*
                                 24                 :                :  * To provide as much functionality as possible on a variety of platforms,
                                 25                 :                :  * without going so far as to implement everything from scratch, we use
                                 26                 :                :  * several implementation strategies depending on the situation:
                                 27                 :                :  *
                                 28                 :                :  * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
                                 29                 :                :  * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
                                 30                 :                :  * collations don't give a fig about multibyte characters.
                                 31                 :                :  *
                                 32                 :                :  * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
                                 33                 :                :  *
                                 34                 :                :  * 2a. When working in UTF8 encoding, we use the <wctype.h> functions.
                                 35                 :                :  * This assumes that every platform uses Unicode codepoints directly
                                 36                 :                :  * as the wchar_t representation of Unicode.  On some platforms
                                 37                 :                :  * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
                                 38                 :                :  *
                                 39                 :                :  * 2b. In all other encodings, we use the <ctype.h> functions for pg_wchar
                                 40                 :                :  * values up to 255, and punt for values above that.  This is 100% correct
                                 41                 :                :  * only in single-byte encodings such as LATINn.  However, non-Unicode
                                 42                 :                :  * multibyte encodings are mostly Far Eastern character sets for which the
                                 43                 :                :  * properties being tested here aren't very relevant for higher code values
                                 44                 :                :  * anyway.  The difficulty with using the <wctype.h> functions with
                                 45                 :                :  * non-Unicode multibyte encodings is that we can have no certainty that
                                 46                 :                :  * the platform's wchar_t representation matches what we do in pg_wchar
                                 47                 :                :  * conversions.
                                 48                 :                :  *
                                 49                 :                :  * 3. Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
                                 50                 :                :  * functions, under exactly the same cases as #2.
                                 51                 :                :  *
                                 52                 :                :  * There is one notable difference between cases 2 and 3: in the "default"
                                 53                 :                :  * collation we force ASCII letters to follow ASCII upcase/downcase rules,
                                 54                 :                :  * while in a non-default collation we just let the library functions do what
                                 55                 :                :  * they will.  The case where this matters is treatment of I/i in Turkish,
                                 56                 :                :  * and the behavior is meant to match the upper()/lower() SQL functions.
                                 57                 :                :  *
                                 58                 :                :  * We store the active collation setting in static variables.  In principle
                                 59                 :                :  * it could be passed down to here via the regex library's "struct vars" data
                                 60                 :                :  * structure; but that would require somewhat invasive changes in the regex
                                 61                 :                :  * library, and right now there's no real benefit to be gained from that.
                                 62                 :                :  *
                                 63                 :                :  * NB: the coding here assumes pg_wchar is an unsigned type.
                                 64                 :                :  */
                                 65                 :                : 
                                 66                 :                : typedef enum
                                 67                 :                : {
                                 68                 :                :     PG_REGEX_LOCALE_C,          /* C locale (encoding independent) */
                                 69                 :                :     PG_REGEX_BUILTIN,           /* built-in Unicode semantics */
                                 70                 :                :     PG_REGEX_LOCALE_WIDE,       /* Use <wctype.h> functions */
                                 71                 :                :     PG_REGEX_LOCALE_1BYTE,      /* Use <ctype.h> functions */
                                 72                 :                :     PG_REGEX_LOCALE_WIDE_L,     /* Use locale_t <wctype.h> functions */
                                 73                 :                :     PG_REGEX_LOCALE_1BYTE_L,    /* Use locale_t <ctype.h> functions */
                                 74                 :                :     PG_REGEX_LOCALE_ICU,        /* Use ICU uchar.h functions */
                                 75                 :                : } PG_Locale_Strategy;
                                 76                 :                : 
                                 77                 :                : static PG_Locale_Strategy pg_regex_strategy;
                                 78                 :                : static pg_locale_t pg_regex_locale;
                                 79                 :                : static Oid  pg_regex_collation;
                                 80                 :                : 
                                 81                 :                : /*
                                 82                 :                :  * Hard-wired character properties for C locale
                                 83                 :                :  */
                                 84                 :                : #define PG_ISDIGIT  0x01
                                 85                 :                : #define PG_ISALPHA  0x02
                                 86                 :                : #define PG_ISALNUM  (PG_ISDIGIT | PG_ISALPHA)
                                 87                 :                : #define PG_ISUPPER  0x04
                                 88                 :                : #define PG_ISLOWER  0x08
                                 89                 :                : #define PG_ISGRAPH  0x10
                                 90                 :                : #define PG_ISPRINT  0x20
                                 91                 :                : #define PG_ISPUNCT  0x40
                                 92                 :                : #define PG_ISSPACE  0x80
                                 93                 :                : 
                                 94                 :                : static const unsigned char pg_char_properties[128] = {
                                 95                 :                :      /* NUL */ 0,
                                 96                 :                :      /* ^A */ 0,
                                 97                 :                :      /* ^B */ 0,
                                 98                 :                :      /* ^C */ 0,
                                 99                 :                :      /* ^D */ 0,
                                100                 :                :      /* ^E */ 0,
                                101                 :                :      /* ^F */ 0,
                                102                 :                :      /* ^G */ 0,
                                103                 :                :      /* ^H */ 0,
                                104                 :                :      /* ^I */ PG_ISSPACE,
                                105                 :                :      /* ^J */ PG_ISSPACE,
                                106                 :                :      /* ^K */ PG_ISSPACE,
                                107                 :                :      /* ^L */ PG_ISSPACE,
                                108                 :                :      /* ^M */ PG_ISSPACE,
                                109                 :                :      /* ^N */ 0,
                                110                 :                :      /* ^O */ 0,
                                111                 :                :      /* ^P */ 0,
                                112                 :                :      /* ^Q */ 0,
                                113                 :                :      /* ^R */ 0,
                                114                 :                :      /* ^S */ 0,
                                115                 :                :      /* ^T */ 0,
                                116                 :                :      /* ^U */ 0,
                                117                 :                :      /* ^V */ 0,
                                118                 :                :      /* ^W */ 0,
                                119                 :                :      /* ^X */ 0,
                                120                 :                :      /* ^Y */ 0,
                                121                 :                :      /* ^Z */ 0,
                                122                 :                :      /* ^[ */ 0,
                                123                 :                :      /* ^\ */ 0,
                                124                 :                :      /* ^] */ 0,
                                125                 :                :      /* ^^ */ 0,
                                126                 :                :      /* ^_ */ 0,
                                127                 :                :      /* */ PG_ISPRINT | PG_ISSPACE,
                                128                 :                :      /* !  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                129                 :                :      /* "  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                130                 :                :      /* #  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                131                 :                :      /* $  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                132                 :                :      /* %  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                133                 :                :      /* &  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                134                 :                :      /* '  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                135                 :                :      /* (  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                136                 :                :      /* )  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                137                 :                :      /* *  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                138                 :                :      /* +  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                139                 :                :      /* ,  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                140                 :                :      /* -  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                141                 :                :      /* .  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                142                 :                :      /* /  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                143                 :                :      /* 0  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                144                 :                :      /* 1  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                145                 :                :      /* 2  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                146                 :                :      /* 3  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                147                 :                :      /* 4  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                148                 :                :      /* 5  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                149                 :                :      /* 6  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                150                 :                :      /* 7  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                151                 :                :      /* 8  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                152                 :                :      /* 9  */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
                                153                 :                :      /* :  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                154                 :                :      /* ;  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                155                 :                :      /* <  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                156                 :                :      /* =  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                157                 :                :      /* >  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                158                 :                :      /* ?  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                159                 :                :      /* @  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                160                 :                :      /* A  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                161                 :                :      /* B  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                162                 :                :      /* C  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                163                 :                :      /* D  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                164                 :                :      /* E  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                165                 :                :      /* F  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                166                 :                :      /* G  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                167                 :                :      /* H  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                168                 :                :      /* I  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                169                 :                :      /* J  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                170                 :                :      /* K  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                171                 :                :      /* L  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                172                 :                :      /* M  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                173                 :                :      /* N  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                174                 :                :      /* O  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                175                 :                :      /* P  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                176                 :                :      /* Q  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                177                 :                :      /* R  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                178                 :                :      /* S  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                179                 :                :      /* T  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                180                 :                :      /* U  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                181                 :                :      /* V  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                182                 :                :      /* W  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                183                 :                :      /* X  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                184                 :                :      /* Y  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                185                 :                :      /* Z  */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
                                186                 :                :      /* [  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                187                 :                :      /* \  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                188                 :                :      /* ]  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                189                 :                :      /* ^  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                190                 :                :      /* _  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                191                 :                :      /* `  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                192                 :                :      /* a  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                193                 :                :      /* b  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                194                 :                :      /* c  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                195                 :                :      /* d  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                196                 :                :      /* e  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                197                 :                :      /* f  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                198                 :                :      /* g  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                199                 :                :      /* h  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                200                 :                :      /* i  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                201                 :                :      /* j  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                202                 :                :      /* k  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                203                 :                :      /* l  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                204                 :                :      /* m  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                205                 :                :      /* n  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                206                 :                :      /* o  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                207                 :                :      /* p  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                208                 :                :      /* q  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                209                 :                :      /* r  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                210                 :                :      /* s  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                211                 :                :      /* t  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                212                 :                :      /* u  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                213                 :                :      /* v  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                214                 :                :      /* w  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                215                 :                :      /* x  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                216                 :                :      /* y  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                217                 :                :      /* z  */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
                                218                 :                :      /* {  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                219                 :                :      /* |  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                220                 :                :      /* }  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                221                 :                :      /* ~  */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
                                222                 :                :      /* DEL */ 0
                                223                 :                : };
                                224                 :                : 
                                225                 :                : 
                                226                 :                : /*
                                227                 :                :  * pg_set_regex_collation: set collation for these functions to obey
                                228                 :                :  *
                                229                 :                :  * This is called when beginning compilation or execution of a regexp.
                                230                 :                :  * Since there's no need for reentrancy of regexp operations, it's okay
                                231                 :                :  * to store the results in static variables.
                                232                 :                :  */
                                233                 :                : void
 4753 tgl@sss.pgh.pa.us         234                 :CBC      961448 : pg_set_regex_collation(Oid collation)
                                235                 :                : {
  815 peter@eisentraut.org      236         [ -  + ]:         961448 :     if (!OidIsValid(collation))
                                237                 :                :     {
                                238                 :                :         /*
                                239                 :                :          * This typically means that the parser could not resolve a conflict
                                240                 :                :          * of implicit collations, so report it that way.
                                241                 :                :          */
  815 peter@eisentraut.org      242         [ #  # ]:UBC           0 :         ereport(ERROR,
                                243                 :                :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
                                244                 :                :                  errmsg("could not determine which collation to use for regular expression"),
                                245                 :                :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
                                246                 :                :     }
                                247                 :                : 
 4753 tgl@sss.pgh.pa.us         248         [ +  + ]:CBC      961448 :     if (lc_ctype_is_c(collation))
                                249                 :                :     {
                                250                 :                :         /* C/POSIX collations use this path regardless of database encoding */
                                251                 :          70746 :         pg_regex_strategy = PG_REGEX_LOCALE_C;
                                252                 :          70746 :         pg_regex_locale = 0;
 4438                           253                 :          70746 :         pg_regex_collation = C_COLLATION_OID;
                                254                 :                :     }
                                255                 :                :     else
                                256                 :                :     {
  815 peter@eisentraut.org      257                 :         890702 :         pg_regex_locale = pg_newlocale_from_collation(collation);
                                258                 :                : 
  416 jdavis@postgresql.or      259         [ +  + ]:         890702 :         if (!pg_locale_deterministic(pg_regex_locale))
 1850 peter@eisentraut.org      260         [ +  - ]:             12 :             ereport(ERROR,
                                261                 :                :                     (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                262                 :                :                      errmsg("nondeterministic collations are not supported for regular expressions")));
                                263                 :                : 
                                264                 :                : #ifdef USE_ICU
 2579 peter_e@gmx.net           265   [ +  +  +  + ]:         890690 :         if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
                                266                 :            471 :             pg_regex_strategy = PG_REGEX_LOCALE_ICU;
                                267                 :                :         else
                                268                 :                : #endif
 4753 tgl@sss.pgh.pa.us         269         [ +  + ]:         890219 :         if (GetDatabaseEncoding() == PG_UTF8)
                                270                 :                :         {
                                271         [ +  + ]:         890217 :             if (pg_regex_locale)
                                272                 :                :             {
   26 jdavis@postgresql.or      273         [ +  - ]:GNC       95024 :                 if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
                                274                 :          95024 :                     pg_regex_strategy = PG_REGEX_BUILTIN;
                                275                 :                :                 else
   26 jdavis@postgresql.or      276                 :UNC           0 :                     pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
                                277                 :                :             }
                                278                 :                :             else
 4753 tgl@sss.pgh.pa.us         279                 :CBC      795193 :                 pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
                                280                 :                :         }
                                281                 :                :         else
                                282                 :                :         {
                                283         [ -  + ]:              2 :             if (pg_regex_locale)
 4753 tgl@sss.pgh.pa.us         284                 :UBC           0 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
                                285                 :                :             else
 4753 tgl@sss.pgh.pa.us         286                 :CBC           2 :                 pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
                                287                 :                :         }
                                288                 :                : 
 4438                           289                 :         890690 :         pg_regex_collation = collation;
                                290                 :                :     }
 4753                           291                 :         961436 : }
                                292                 :                : 
                                293                 :                : static int
                                294                 :          72968 : pg_wc_isdigit(pg_wchar c)
                                295                 :                : {
                                296   [ +  +  +  -  :          72968 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                297                 :                :     {
                                298                 :           1071 :         case PG_REGEX_LOCALE_C:
                                299         [ +  - ]:           2142 :             return (c <= (pg_wchar) 127 &&
                                300         [ +  + ]:           1071 :                     (pg_char_properties[c] & PG_ISDIGIT));
   26 jdavis@postgresql.or      301                 :GNC       22583 :         case PG_REGEX_BUILTIN:
                                302                 :          22583 :             return pg_u_isdigit(c, true);
 4753 tgl@sss.pgh.pa.us         303                 :CBC       43170 :         case PG_REGEX_LOCALE_WIDE:
                                304                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                305                 :          43170 :                 return iswdigit((wint_t) c);
                                306                 :                :             /* FALL THRU */
                                307                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         308         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                309         [ #  # ]:              0 :                     isdigit((unsigned char) c));
                                310                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                311                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           312                 :              0 :                 return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
                                313                 :                :             /* FALL THRU */
                                314                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         315         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           316         [ #  # ]:              0 :                     isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
                                317                 :                :             break;
 2579 peter_e@gmx.net           318                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                319                 :                : #ifdef USE_ICU
                                320                 :           6144 :             return u_isdigit(c);
                                321                 :                : #endif
                                322                 :                :             break;
                                323                 :                :     }
 4753 tgl@sss.pgh.pa.us         324                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                325                 :                : }
                                326                 :                : 
                                327                 :                : static int
 4753 tgl@sss.pgh.pa.us         328                 :CBC        8330 : pg_wc_isalpha(pg_wchar c)
                                329                 :                : {
                                330   [ -  +  +  -  :           8330 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                331                 :                :     {
 4753 tgl@sss.pgh.pa.us         332                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                333         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                334         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISALPHA));
   26 jdavis@postgresql.or      335                 :GNC          11 :         case PG_REGEX_BUILTIN:
                                336                 :             11 :             return pg_u_isalpha(c);
 4753 tgl@sss.pgh.pa.us         337                 :CBC        2175 :         case PG_REGEX_LOCALE_WIDE:
                                338                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                339                 :           2175 :                 return iswalpha((wint_t) c);
                                340                 :                :             /* FALL THRU */
                                341                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         342         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                343         [ #  # ]:              0 :                     isalpha((unsigned char) c));
                                344                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                345                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           346                 :              0 :                 return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
                                347                 :                :             /* FALL THRU */
                                348                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         349         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           350         [ #  # ]:              0 :                     isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
                                351                 :                :             break;
 2579 peter_e@gmx.net           352                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                353                 :                : #ifdef USE_ICU
                                354                 :           6144 :             return u_isalpha(c);
                                355                 :                : #endif
                                356                 :                :             break;
                                357                 :                :     }
 4753 tgl@sss.pgh.pa.us         358                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                359                 :                : }
                                360                 :                : 
                                361                 :                : static int
 4753 tgl@sss.pgh.pa.us         362                 :CBC       33158 : pg_wc_isalnum(pg_wchar c)
                                363                 :                : {
                                364   [ +  +  +  -  :          33158 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                365                 :                :     {
                                366                 :            381 :         case PG_REGEX_LOCALE_C:
                                367         [ +  - ]:            762 :             return (c <= (pg_wchar) 127 &&
                                368         [ +  + ]:            381 :                     (pg_char_properties[c] & PG_ISALNUM));
   26 jdavis@postgresql.or      369                 :GNC       10238 :         case PG_REGEX_BUILTIN:
                                370                 :          10238 :             return pg_u_isalnum(c, true);
 4753 tgl@sss.pgh.pa.us         371                 :CBC       16395 :         case PG_REGEX_LOCALE_WIDE:
                                372                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                373                 :          16395 :                 return iswalnum((wint_t) c);
                                374                 :                :             /* FALL THRU */
                                375                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         376         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                377         [ #  # ]:              0 :                     isalnum((unsigned char) c));
                                378                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                379                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           380                 :              0 :                 return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
                                381                 :                :             /* FALL THRU */
                                382                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         383         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           384         [ #  # ]:              0 :                     isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
                                385                 :                :             break;
 2579 peter_e@gmx.net           386                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                387                 :                : #ifdef USE_ICU
                                388                 :           6144 :             return u_isalnum(c);
                                389                 :                : #endif
                                390                 :                :             break;
                                391                 :                :     }
 4753 tgl@sss.pgh.pa.us         392                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                393                 :                : }
                                394                 :                : 
                                395                 :                : static int
 1144 tgl@sss.pgh.pa.us         396                 :CBC       16769 : pg_wc_isword(pg_wchar c)
                                397                 :                : {
                                398                 :                :     /* We define word characters as alnum class plus underscore */
                                399         [ +  + ]:          16769 :     if (c == CHR('_'))
                                400                 :             11 :         return 1;
                                401                 :          16758 :     return pg_wc_isalnum(c);
                                402                 :                : }
                                403                 :                : 
                                404                 :                : static int
 4753                           405                 :          14344 : pg_wc_isupper(pg_wchar c)
                                406                 :                : {
                                407   [ -  +  +  -  :          14344 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                408                 :                :     {
 4753 tgl@sss.pgh.pa.us         409                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                410         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                411         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISUPPER));
   26 jdavis@postgresql.or      412                 :GNC        6144 :         case PG_REGEX_BUILTIN:
                                413                 :           6144 :             return pg_u_isupper(c);
 4753 tgl@sss.pgh.pa.us         414                 :CBC        2056 :         case PG_REGEX_LOCALE_WIDE:
                                415                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                416                 :           2056 :                 return iswupper((wint_t) c);
                                417                 :                :             /* FALL THRU */
                                418                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         419         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                420         [ #  # ]:              0 :                     isupper((unsigned char) c));
                                421                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                422                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           423                 :              0 :                 return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
                                424                 :                :             /* FALL THRU */
                                425                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         426         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           427         [ #  # ]:              0 :                     isupper_l((unsigned char) c, pg_regex_locale->info.lt));
                                428                 :                :             break;
 2579 peter_e@gmx.net           429                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                430                 :                : #ifdef USE_ICU
                                431                 :           6144 :             return u_isupper(c);
                                432                 :                : #endif
                                433                 :                :             break;
                                434                 :                :     }
 4753 tgl@sss.pgh.pa.us         435                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                436                 :                : }
                                437                 :                : 
                                438                 :                : static int
 4753 tgl@sss.pgh.pa.us         439                 :CBC        8195 : pg_wc_islower(pg_wchar c)
                                440                 :                : {
                                441   [ -  -  +  -  :           8195 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                442                 :                :     {
 4753 tgl@sss.pgh.pa.us         443                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                444         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                445         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISLOWER));
   26 jdavis@postgresql.or      446                 :UNC           0 :         case PG_REGEX_BUILTIN:
                                447                 :              0 :             return pg_u_islower(c);
 4753 tgl@sss.pgh.pa.us         448                 :CBC        2051 :         case PG_REGEX_LOCALE_WIDE:
                                449                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                450                 :           2051 :                 return iswlower((wint_t) c);
                                451                 :                :             /* FALL THRU */
                                452                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         453         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                454         [ #  # ]:              0 :                     islower((unsigned char) c));
                                455                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                456                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           457                 :              0 :                 return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
                                458                 :                :             /* FALL THRU */
                                459                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         460         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           461         [ #  # ]:              0 :                     islower_l((unsigned char) c, pg_regex_locale->info.lt));
                                462                 :                :             break;
 2579 peter_e@gmx.net           463                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                464                 :                : #ifdef USE_ICU
                                465                 :           6144 :             return u_islower(c);
                                466                 :                : #endif
                                467                 :                :             break;
                                468                 :                :     }
 4753 tgl@sss.pgh.pa.us         469                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                470                 :                : }
                                471                 :                : 
                                472                 :                : static int
 4753 tgl@sss.pgh.pa.us         473                 :CBC        8195 : pg_wc_isgraph(pg_wchar c)
                                474                 :                : {
                                475   [ -  -  +  -  :           8195 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                476                 :                :     {
 4753 tgl@sss.pgh.pa.us         477                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                478         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                479         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISGRAPH));
   26 jdavis@postgresql.or      480                 :UNC           0 :         case PG_REGEX_BUILTIN:
                                481                 :              0 :             return pg_u_isgraph(c);
 4753 tgl@sss.pgh.pa.us         482                 :CBC        2051 :         case PG_REGEX_LOCALE_WIDE:
                                483                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                484                 :           2051 :                 return iswgraph((wint_t) c);
                                485                 :                :             /* FALL THRU */
                                486                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         487         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                488         [ #  # ]:              0 :                     isgraph((unsigned char) c));
                                489                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                490                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           491                 :              0 :                 return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
                                492                 :                :             /* FALL THRU */
                                493                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         494         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           495         [ #  # ]:              0 :                     isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
                                496                 :                :             break;
 2579 peter_e@gmx.net           497                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                498                 :                : #ifdef USE_ICU
                                499                 :           6144 :             return u_isgraph(c);
                                500                 :                : #endif
                                501                 :                :             break;
                                502                 :                :     }
 4753 tgl@sss.pgh.pa.us         503                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                504                 :                : }
                                505                 :                : 
                                506                 :                : static int
 4753 tgl@sss.pgh.pa.us         507                 :CBC        8195 : pg_wc_isprint(pg_wchar c)
                                508                 :                : {
                                509   [ -  -  +  -  :           8195 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                510                 :                :     {
 4753 tgl@sss.pgh.pa.us         511                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                512         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                513         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISPRINT));
   26 jdavis@postgresql.or      514                 :UNC           0 :         case PG_REGEX_BUILTIN:
                                515                 :              0 :             return pg_u_isprint(c);
 4753 tgl@sss.pgh.pa.us         516                 :CBC        2051 :         case PG_REGEX_LOCALE_WIDE:
                                517                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                518                 :           2051 :                 return iswprint((wint_t) c);
                                519                 :                :             /* FALL THRU */
                                520                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         521         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                522         [ #  # ]:              0 :                     isprint((unsigned char) c));
                                523                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                524                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           525                 :              0 :                 return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
                                526                 :                :             /* FALL THRU */
                                527                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         528         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           529         [ #  # ]:              0 :                     isprint_l((unsigned char) c, pg_regex_locale->info.lt));
                                530                 :                :             break;
 2579 peter_e@gmx.net           531                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                532                 :                : #ifdef USE_ICU
                                533                 :           6144 :             return u_isprint(c);
                                534                 :                : #endif
                                535                 :                :             break;
                                536                 :                :     }
 4753 tgl@sss.pgh.pa.us         537                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                538                 :                : }
                                539                 :                : 
                                540                 :                : static int
 4753 tgl@sss.pgh.pa.us         541                 :CBC       14339 : pg_wc_ispunct(pg_wchar c)
                                542                 :                : {
                                543   [ -  +  +  -  :          14339 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                544                 :                :     {
 4753 tgl@sss.pgh.pa.us         545                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                546         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                547         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISPUNCT));
   26 jdavis@postgresql.or      548                 :GNC        6144 :         case PG_REGEX_BUILTIN:
                                549                 :           6144 :             return pg_u_ispunct(c, true);
 4753 tgl@sss.pgh.pa.us         550                 :CBC        2051 :         case PG_REGEX_LOCALE_WIDE:
                                551                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                552                 :           2051 :                 return iswpunct((wint_t) c);
                                553                 :                :             /* FALL THRU */
                                554                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         555         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                556         [ #  # ]:              0 :                     ispunct((unsigned char) c));
                                557                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                558                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           559                 :              0 :                 return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
                                560                 :                :             /* FALL THRU */
                                561                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         562         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           563         [ #  # ]:              0 :                     ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
                                564                 :                :             break;
 2579 peter_e@gmx.net           565                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                566                 :                : #ifdef USE_ICU
                                567                 :           6144 :             return u_ispunct(c);
                                568                 :                : #endif
                                569                 :                :             break;
                                570                 :                :     }
 4753 tgl@sss.pgh.pa.us         571                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                572                 :                : }
                                573                 :                : 
                                574                 :                : static int
 4753 tgl@sss.pgh.pa.us         575                 :CBC       38193 : pg_wc_isspace(pg_wchar c)
                                576                 :                : {
                                577   [ -  +  +  -  :          38193 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                578                 :                :     {
 4753 tgl@sss.pgh.pa.us         579                 :UBC           0 :         case PG_REGEX_LOCALE_C:
                                580         [ #  # ]:              0 :             return (c <= (pg_wchar) 127 &&
                                581         [ #  # ]:              0 :                     (pg_char_properties[c] & PG_ISSPACE));
   26 jdavis@postgresql.or      582                 :GNC        8199 :         case PG_REGEX_BUILTIN:
                                583                 :           8199 :             return pg_u_isspace(c);
 4753 tgl@sss.pgh.pa.us         584                 :CBC       23850 :         case PG_REGEX_LOCALE_WIDE:
                                585                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                586                 :          23850 :                 return iswspace((wint_t) c);
                                587                 :                :             /* FALL THRU */
                                588                 :                :         case PG_REGEX_LOCALE_1BYTE:
 4753 tgl@sss.pgh.pa.us         589         [ #  # ]:UBC           0 :             return (c <= (pg_wchar) UCHAR_MAX &&
                                590         [ #  # ]:              0 :                     isspace((unsigned char) c));
                                591                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                592                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           593                 :              0 :                 return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
                                594                 :                :             /* FALL THRU */
                                595                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         596         [ #  # ]:              0 :             return (c <= (pg_wchar) UCHAR_MAX &&
 2579 peter_e@gmx.net           597         [ #  # ]:              0 :                     isspace_l((unsigned char) c, pg_regex_locale->info.lt));
                                598                 :                :             break;
 2579 peter_e@gmx.net           599                 :CBC        6144 :         case PG_REGEX_LOCALE_ICU:
                                600                 :                : #ifdef USE_ICU
                                601                 :           6144 :             return u_isspace(c);
                                602                 :                : #endif
                                603                 :                :             break;
                                604                 :                :     }
 4753 tgl@sss.pgh.pa.us         605                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                606                 :                : }
                                607                 :                : 
                                608                 :                : static pg_wchar
 4753 tgl@sss.pgh.pa.us         609                 :CBC        5273 : pg_wc_toupper(pg_wchar c)
                                610                 :                : {
                                611   [ +  +  +  -  :           5273 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                612                 :                :     {
                                613                 :            528 :         case PG_REGEX_LOCALE_C:
                                614         [ +  - ]:            528 :             if (c <= (pg_wchar) 127)
                                615                 :            528 :                 return pg_ascii_toupper((unsigned char) c);
 4753 tgl@sss.pgh.pa.us         616                 :UBC           0 :             return c;
   26 jdavis@postgresql.or      617                 :GNC         186 :         case PG_REGEX_BUILTIN:
                                618                 :            186 :             return unicode_uppercase_simple(c);
 4753 tgl@sss.pgh.pa.us         619                 :CBC        4505 :         case PG_REGEX_LOCALE_WIDE:
                                620                 :                :             /* force C behavior for ASCII characters, per comments above */
                                621         [ +  + ]:           4505 :             if (c <= (pg_wchar) 127)
                                622                 :            407 :                 return pg_ascii_toupper((unsigned char) c);
                                623                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                624                 :           4098 :                 return towupper((wint_t) c);
                                625                 :                :             /* FALL THRU */
                                626                 :                :         case PG_REGEX_LOCALE_1BYTE:
                                627                 :                :             /* force C behavior for ASCII characters, per comments above */
 4753 tgl@sss.pgh.pa.us         628         [ #  # ]:UBC           0 :             if (c <= (pg_wchar) 127)
                                629                 :              0 :                 return pg_ascii_toupper((unsigned char) c);
                                630         [ #  # ]:              0 :             if (c <= (pg_wchar) UCHAR_MAX)
                                631                 :              0 :                 return toupper((unsigned char) c);
                                632                 :              0 :             return c;
                                633                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                634                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           635                 :              0 :                 return towupper_l((wint_t) c, pg_regex_locale->info.lt);
                                636                 :                :             /* FALL THRU */
                                637                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         638         [ #  # ]:              0 :             if (c <= (pg_wchar) UCHAR_MAX)
 2579 peter_e@gmx.net           639                 :              0 :                 return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
 4753 tgl@sss.pgh.pa.us         640                 :              0 :             return c;
 2579 peter_e@gmx.net           641                 :CBC          54 :         case PG_REGEX_LOCALE_ICU:
                                642                 :                : #ifdef USE_ICU
                                643                 :             54 :             return u_toupper(c);
                                644                 :                : #endif
                                645                 :                :             break;
                                646                 :                :     }
 4753 tgl@sss.pgh.pa.us         647                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                648                 :                : }
                                649                 :                : 
                                650                 :                : static pg_wchar
 4753 tgl@sss.pgh.pa.us         651                 :CBC        5275 : pg_wc_tolower(pg_wchar c)
                                652                 :                : {
                                653   [ +  +  +  -  :           5275 :     switch (pg_regex_strategy)
                                        -  -  +  - ]
                                654                 :                :     {
                                655                 :            528 :         case PG_REGEX_LOCALE_C:
                                656         [ +  - ]:            528 :             if (c <= (pg_wchar) 127)
                                657                 :            528 :                 return pg_ascii_tolower((unsigned char) c);
 4753 tgl@sss.pgh.pa.us         658                 :UBC           0 :             return c;
   26 jdavis@postgresql.or      659                 :GNC         186 :         case PG_REGEX_BUILTIN:
                                660                 :            186 :             return unicode_lowercase_simple(c);
 4753 tgl@sss.pgh.pa.us         661                 :CBC        4507 :         case PG_REGEX_LOCALE_WIDE:
                                662                 :                :             /* force C behavior for ASCII characters, per comments above */
                                663         [ +  + ]:           4507 :             if (c <= (pg_wchar) 127)
                                664                 :            409 :                 return pg_ascii_tolower((unsigned char) c);
                                665                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
                                666                 :           4098 :                 return towlower((wint_t) c);
                                667                 :                :             /* FALL THRU */
                                668                 :                :         case PG_REGEX_LOCALE_1BYTE:
                                669                 :                :             /* force C behavior for ASCII characters, per comments above */
 4753 tgl@sss.pgh.pa.us         670         [ #  # ]:UBC           0 :             if (c <= (pg_wchar) 127)
                                671                 :              0 :                 return pg_ascii_tolower((unsigned char) c);
                                672         [ #  # ]:              0 :             if (c <= (pg_wchar) UCHAR_MAX)
                                673                 :              0 :                 return tolower((unsigned char) c);
                                674                 :              0 :             return c;
                                675                 :              0 :         case PG_REGEX_LOCALE_WIDE_L:
                                676                 :                :             if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 2579 peter_e@gmx.net           677                 :              0 :                 return towlower_l((wint_t) c, pg_regex_locale->info.lt);
                                678                 :                :             /* FALL THRU */
                                679                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
 4753 tgl@sss.pgh.pa.us         680         [ #  # ]:              0 :             if (c <= (pg_wchar) UCHAR_MAX)
 2579 peter_e@gmx.net           681                 :              0 :                 return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
 4753 tgl@sss.pgh.pa.us         682                 :              0 :             return c;
 2579 peter_e@gmx.net           683                 :CBC          54 :         case PG_REGEX_LOCALE_ICU:
                                684                 :                : #ifdef USE_ICU
                                685                 :             54 :             return u_tolower(c);
                                686                 :                : #endif
                                687                 :                :             break;
                                688                 :                :     }
 4753 tgl@sss.pgh.pa.us         689                 :UBC           0 :     return 0;                   /* can't get here, but keep compiler quiet */
                                690                 :                : }
                                691                 :                : 
                                692                 :                : 
                                693                 :                : /*
                                694                 :                :  * These functions cache the results of probing libc's ctype behavior for
                                695                 :                :  * all character codes of interest in a given encoding/collation.  The
                                696                 :                :  * result is provided as a "struct cvec", but notice that the representation
                                697                 :                :  * is a touch different from a cvec created by regc_cvec.c: we allocate the
                                698                 :                :  * chrs[] and ranges[] arrays separately from the struct so that we can
                                699                 :                :  * realloc them larger at need.  This is okay since the cvecs made here
                                700                 :                :  * should never be freed by freecvec().
                                701                 :                :  *
                                702                 :                :  * We use malloc not palloc since we mustn't lose control on out-of-memory;
                                703                 :                :  * the main regex code expects us to return a failure indication instead.
                                704                 :                :  */
                                705                 :                : 
                                706                 :                : typedef int (*pg_wc_probefunc) (pg_wchar c);
                                707                 :                : 
                                708                 :                : typedef struct pg_ctype_cache
                                709                 :                : {
                                710                 :                :     pg_wc_probefunc probefunc;  /* pg_wc_isalpha or a sibling */
                                711                 :                :     Oid         collation;      /* collation this entry is for */
                                712                 :                :     struct cvec cv;             /* cache entry contents */
                                713                 :                :     struct pg_ctype_cache *next;    /* chain link */
                                714                 :                : } pg_ctype_cache;
                                715                 :                : 
                                716                 :                : static pg_ctype_cache *pg_ctype_cache_list = NULL;
                                717                 :                : 
                                718                 :                : /*
                                719                 :                :  * Add a chr or range to pcc->cv; return false if run out of memory
                                720                 :                :  */
                                721                 :                : static bool
 4438 tgl@sss.pgh.pa.us         722                 :CBC        4480 : store_match(pg_ctype_cache *pcc, pg_wchar chr1, int nchrs)
                                723                 :                : {
                                724                 :                :     chr        *newchrs;
                                725                 :                : 
                                726         [ +  + ]:           4480 :     if (nchrs > 1)
                                727                 :                :     {
                                728         [ -  + ]:           1361 :         if (pcc->cv.nranges >= pcc->cv.rangespace)
                                729                 :                :         {
 4438 tgl@sss.pgh.pa.us         730                 :UBC           0 :             pcc->cv.rangespace *= 2;
                                731                 :              0 :             newchrs = (chr *) realloc(pcc->cv.ranges,
                                732                 :              0 :                                       pcc->cv.rangespace * sizeof(chr) * 2);
                                733         [ #  # ]:              0 :             if (newchrs == NULL)
                                734                 :              0 :                 return false;
                                735                 :              0 :             pcc->cv.ranges = newchrs;
                                736                 :                :         }
 4438 tgl@sss.pgh.pa.us         737                 :CBC        1361 :         pcc->cv.ranges[pcc->cv.nranges * 2] = chr1;
                                738                 :           1361 :         pcc->cv.ranges[pcc->cv.nranges * 2 + 1] = chr1 + nchrs - 1;
                                739                 :           1361 :         pcc->cv.nranges++;
                                740                 :                :     }
                                741                 :                :     else
                                742                 :                :     {
                                743         [ -  + ]:           3119 :         assert(nchrs == 1);
                                744         [ +  + ]:           3119 :         if (pcc->cv.nchrs >= pcc->cv.chrspace)
                                745                 :                :         {
                                746                 :             11 :             pcc->cv.chrspace *= 2;
                                747                 :             11 :             newchrs = (chr *) realloc(pcc->cv.chrs,
                                748                 :             11 :                                       pcc->cv.chrspace * sizeof(chr));
                                749         [ -  + ]:             11 :             if (newchrs == NULL)
 4438 tgl@sss.pgh.pa.us         750                 :UBC           0 :                 return false;
 4438 tgl@sss.pgh.pa.us         751                 :CBC          11 :             pcc->cv.chrs = newchrs;
                                752                 :                :         }
                                753                 :           3119 :         pcc->cv.chrs[pcc->cv.nchrs++] = chr1;
                                754                 :                :     }
                                755                 :           4480 :     return true;
                                756                 :                : }
                                757                 :                : 
                                758                 :                : /*
                                759                 :                :  * Given a probe function (e.g., pg_wc_isalpha) get a struct cvec for all
                                760                 :                :  * chrs satisfying the probe function.  The active collation is the one
                                761                 :                :  * previously set by pg_set_regex_collation.  Return NULL if out of memory.
                                762                 :                :  *
                                763                 :                :  * Note that the result must not be freed or modified by caller.
                                764                 :                :  */
                                765                 :                : static struct cvec *
 2778                           766                 :            351 : pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
                                767                 :                : {
                                768                 :                :     pg_ctype_cache *pcc;
                                769                 :                :     pg_wchar    max_chr;
                                770                 :                :     pg_wchar    cur_chr;
                                771                 :                :     int         nmatches;
                                772                 :                :     chr        *newchrs;
                                773                 :                : 
                                774                 :                :     /*
                                775                 :                :      * Do we already have the answer cached?
                                776                 :                :      */
 4438                           777         [ +  + ]:            817 :     for (pcc = pg_ctype_cache_list; pcc != NULL; pcc = pcc->next)
                                778                 :                :     {
                                779         [ +  + ]:            707 :         if (pcc->probefunc == probefunc &&
                                780         [ +  + ]:            265 :             pcc->collation == pg_regex_collation)
                                781                 :            241 :             return &pcc->cv;
                                782                 :                :     }
                                783                 :                : 
                                784                 :                :     /*
                                785                 :                :      * Nope, so initialize some workspace ...
                                786                 :                :      */
                                787                 :            110 :     pcc = (pg_ctype_cache *) malloc(sizeof(pg_ctype_cache));
                                788         [ -  + ]:            110 :     if (pcc == NULL)
 4438 tgl@sss.pgh.pa.us         789                 :UBC           0 :         return NULL;
 4438 tgl@sss.pgh.pa.us         790                 :CBC         110 :     pcc->probefunc = probefunc;
                                791                 :            110 :     pcc->collation = pg_regex_collation;
                                792                 :            110 :     pcc->cv.nchrs = 0;
                                793                 :            110 :     pcc->cv.chrspace = 128;
                                794                 :            110 :     pcc->cv.chrs = (chr *) malloc(pcc->cv.chrspace * sizeof(chr));
                                795                 :            110 :     pcc->cv.nranges = 0;
                                796                 :            110 :     pcc->cv.rangespace = 64;
                                797                 :            110 :     pcc->cv.ranges = (chr *) malloc(pcc->cv.rangespace * sizeof(chr) * 2);
                                798   [ +  -  -  + ]:            110 :     if (pcc->cv.chrs == NULL || pcc->cv.ranges == NULL)
 4438 tgl@sss.pgh.pa.us         799                 :UBC           0 :         goto out_of_memory;
 2778 tgl@sss.pgh.pa.us         800                 :CBC         110 :     pcc->cv.cclasscode = cclasscode;
                                801                 :                : 
                                802                 :                :     /*
                                803                 :                :      * Decide how many character codes we ought to look through.  In general
                                804                 :                :      * we don't go past MAX_SIMPLE_CHR; chr codes above that are handled at
                                805                 :                :      * runtime using the "high colormap" mechanism.  However, in C locale
                                806                 :                :      * there's no need to go further than 127, and if we only have a 1-byte
                                807                 :                :      * <ctype.h> API there's no need to go further than that can handle.
                                808                 :                :      *
                                809                 :                :      * If it's not MAX_SIMPLE_CHR that's constraining the search, mark the
                                810                 :                :      * output cvec as not having any locale-dependent behavior, since there
                                811                 :                :      * will be no need to do any run-time locale checks.  (The #if's here
                                812                 :                :      * would always be true for production values of MAX_SIMPLE_CHR, but it's
                                813                 :                :      * useful to allow it to be small for testing purposes.)
                                814                 :                :      */
 4438                           815   [ +  +  +  -  :            110 :     switch (pg_regex_strategy)
                                              +  - ]
                                816                 :                :     {
                                817                 :             11 :         case PG_REGEX_LOCALE_C:
                                818                 :                : #if MAX_SIMPLE_CHR >= 127
                                819                 :             11 :             max_chr = (pg_wchar) 127;
 2778                           820                 :             11 :             pcc->cv.cclasscode = -1;
                                821                 :                : #else
                                822                 :                :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                823                 :                : #endif
 4438                           824                 :             11 :             break;
   26 jdavis@postgresql.or      825                 :GNC          26 :         case PG_REGEX_BUILTIN:
                                826                 :             26 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                827                 :             26 :             break;
 4438 tgl@sss.pgh.pa.us         828                 :CBC          46 :         case PG_REGEX_LOCALE_WIDE:
                                829                 :                :         case PG_REGEX_LOCALE_WIDE_L:
 2778                           830                 :             46 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 4438                           831                 :             46 :             break;
 4438 tgl@sss.pgh.pa.us         832                 :UBC           0 :         case PG_REGEX_LOCALE_1BYTE:
                                833                 :                :         case PG_REGEX_LOCALE_1BYTE_L:
                                834                 :                : #if MAX_SIMPLE_CHR >= UCHAR_MAX
                                835                 :              0 :             max_chr = (pg_wchar) UCHAR_MAX;
 2778                           836                 :              0 :             pcc->cv.cclasscode = -1;
                                837                 :                : #else
                                838                 :                :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                839                 :                : #endif
 4438                           840                 :              0 :             break;
 2579 peter_e@gmx.net           841                 :CBC          27 :         case PG_REGEX_LOCALE_ICU:
                                842                 :             27 :             max_chr = (pg_wchar) MAX_SIMPLE_CHR;
                                843                 :             27 :             break;
 4438 tgl@sss.pgh.pa.us         844                 :UBC           0 :         default:
   26 jdavis@postgresql.or      845                 :UNC           0 :             Assert(false);
 4438 tgl@sss.pgh.pa.us         846                 :EUB             :             max_chr = 0;        /* can't get here, but keep compiler quiet */
                                847                 :                :             break;
                                848                 :                :     }
                                849                 :                : 
                                850                 :                :     /*
                                851                 :                :      * And scan 'em ...
                                852                 :                :      */
 4438 tgl@sss.pgh.pa.us         853                 :CBC         110 :     nmatches = 0;               /* number of consecutive matches */
                                854                 :                : 
                                855         [ +  + ]:         204270 :     for (cur_chr = 0; cur_chr <= max_chr; cur_chr++)
                                856                 :                :     {
                                857         [ +  + ]:         204160 :         if ((*probefunc) (cur_chr))
                                858                 :          53351 :             nmatches++;
                                859         [ +  + ]:         150809 :         else if (nmatches > 0)
                                860                 :                :         {
                                861         [ -  + ]:           4468 :             if (!store_match(pcc, cur_chr - nmatches, nmatches))
 4438 tgl@sss.pgh.pa.us         862                 :UBC           0 :                 goto out_of_memory;
 4438 tgl@sss.pgh.pa.us         863                 :CBC        4468 :             nmatches = 0;
                                864                 :                :         }
                                865                 :                :     }
                                866                 :                : 
                                867         [ +  + ]:            110 :     if (nmatches > 0)
                                868         [ -  + ]:             12 :         if (!store_match(pcc, cur_chr - nmatches, nmatches))
 4438 tgl@sss.pgh.pa.us         869                 :UBC           0 :             goto out_of_memory;
                                870                 :                : 
                                871                 :                :     /*
                                872                 :                :      * We might have allocated more memory than needed, if so free it
                                873                 :                :      */
 4438 tgl@sss.pgh.pa.us         874         [ +  + ]:CBC         110 :     if (pcc->cv.nchrs == 0)
                                875                 :                :     {
                                876                 :             43 :         free(pcc->cv.chrs);
                                877                 :             43 :         pcc->cv.chrs = NULL;
                                878                 :             43 :         pcc->cv.chrspace = 0;
                                879                 :                :     }
                                880         [ +  - ]:             67 :     else if (pcc->cv.nchrs < pcc->cv.chrspace)
                                881                 :                :     {
                                882                 :             67 :         newchrs = (chr *) realloc(pcc->cv.chrs,
                                883                 :             67 :                                   pcc->cv.nchrs * sizeof(chr));
                                884         [ -  + ]:             67 :         if (newchrs == NULL)
 4438 tgl@sss.pgh.pa.us         885                 :UBC           0 :             goto out_of_memory;
 4438 tgl@sss.pgh.pa.us         886                 :CBC          67 :         pcc->cv.chrs = newchrs;
                                887                 :             67 :         pcc->cv.chrspace = pcc->cv.nchrs;
                                888                 :                :     }
                                889         [ -  + ]:            110 :     if (pcc->cv.nranges == 0)
                                890                 :                :     {
 4438 tgl@sss.pgh.pa.us         891                 :UBC           0 :         free(pcc->cv.ranges);
                                892                 :              0 :         pcc->cv.ranges = NULL;
                                893                 :              0 :         pcc->cv.rangespace = 0;
                                894                 :                :     }
 4438 tgl@sss.pgh.pa.us         895         [ +  - ]:CBC         110 :     else if (pcc->cv.nranges < pcc->cv.rangespace)
                                896                 :                :     {
                                897                 :            110 :         newchrs = (chr *) realloc(pcc->cv.ranges,
                                898                 :            110 :                                   pcc->cv.nranges * sizeof(chr) * 2);
                                899         [ -  + ]:            110 :         if (newchrs == NULL)
 4438 tgl@sss.pgh.pa.us         900                 :UBC           0 :             goto out_of_memory;
 4438 tgl@sss.pgh.pa.us         901                 :CBC         110 :         pcc->cv.ranges = newchrs;
                                902                 :            110 :         pcc->cv.rangespace = pcc->cv.nranges;
                                903                 :                :     }
                                904                 :                : 
                                905                 :                :     /*
                                906                 :                :      * Success, link it into cache chain
                                907                 :                :      */
                                908                 :            110 :     pcc->next = pg_ctype_cache_list;
                                909                 :            110 :     pg_ctype_cache_list = pcc;
                                910                 :                : 
                                911                 :            110 :     return &pcc->cv;
                                912                 :                : 
                                913                 :                :     /*
                                914                 :                :      * Failure, clean up
                                915                 :                :      */
 4438 tgl@sss.pgh.pa.us         916                 :UBC           0 : out_of_memory:
  668 peter@eisentraut.org      917                 :              0 :     free(pcc->cv.chrs);
                                918                 :              0 :     free(pcc->cv.ranges);
 4438 tgl@sss.pgh.pa.us         919                 :              0 :     free(pcc);
                                920                 :                : 
                                921                 :              0 :     return NULL;
                                922                 :                : }
        

Generated by: LCOV version 2.1-beta2-3-g6141622