LCOV - differential code coverage report
Current view: top level - src/backend/regex - regc_locale.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.1 % 177 163 4 4 6 6 49 1 107 2 50 2
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 10 10 8 1 1 8
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*
       2                 :  * regc_locale.c --
       3                 :  *
       4                 :  *  This file contains locale-specific regexp routines.
       5                 :  *  This file is #included by regcomp.c.
       6                 :  *
       7                 :  * Copyright (c) 1998 by Scriptics Corporation.
       8                 :  *
       9                 :  * This software is copyrighted by the Regents of the University of
      10                 :  * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
      11                 :  * Corporation and other parties.  The following terms apply to all files
      12                 :  * associated with the software unless explicitly disclaimed in
      13                 :  * individual files.
      14                 :  *
      15                 :  * The authors hereby grant permission to use, copy, modify, distribute,
      16                 :  * and license this software and its documentation for any purpose, provided
      17                 :  * that existing copyright notices are retained in all copies and that this
      18                 :  * notice is included verbatim in any distributions. No written agreement,
      19                 :  * license, or royalty fee is required for any of the authorized uses.
      20                 :  * Modifications to this software may be copyrighted by their authors
      21                 :  * and need not follow the licensing terms described here, provided that
      22                 :  * the new terms are clearly indicated on the first page of each file where
      23                 :  * they apply.
      24                 :  *
      25                 :  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
      26                 :  * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
      27                 :  * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
      28                 :  * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
      29                 :  * POSSIBILITY OF SUCH DAMAGE.
      30                 :  *
      31                 :  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
      32                 :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
      33                 :  * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
      34                 :  * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
      35                 :  * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
      36                 :  * MODIFICATIONS.
      37                 :  *
      38                 :  * GOVERNMENT USE: If you are acquiring this software on behalf of the
      39                 :  * U.S. government, the Government shall have only "Restricted Rights"
      40                 :  * in the software and related documentation as defined in the Federal
      41                 :  * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
      42                 :  * are acquiring the software on behalf of the Department of Defense, the
      43                 :  * software shall be classified as "Commercial Computer Software" and the
      44                 :  * Government shall have only "Restricted Rights" as defined in Clause
      45                 :  * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
      46                 :  * authors grant the U.S. Government and others acting in its behalf
      47                 :  * permission to use and distribute the software in accordance with the
      48                 :  * terms specified in this license.
      49                 :  *
      50                 :  * src/backend/regex/regc_locale.c
      51                 :  */
      52                 : 
      53                 : /* ASCII character-name table */
      54                 : 
      55                 : static const struct cname
      56                 : {
      57                 :     const char *name;
      58                 :     const char  code;
      59                 : }           cnames[] =
      60                 : 
      61                 : {
      62                 :     {
      63                 :         "NUL", '\0'
      64                 :     },
      65                 :     {
      66                 :         "SOH", '\001'
      67                 :     },
      68                 :     {
      69                 :         "STX", '\002'
      70                 :     },
      71                 :     {
      72                 :         "ETX", '\003'
      73                 :     },
      74                 :     {
      75                 :         "EOT", '\004'
      76                 :     },
      77                 :     {
      78                 :         "ENQ", '\005'
      79                 :     },
      80                 :     {
      81                 :         "ACK", '\006'
      82                 :     },
      83                 :     {
      84                 :         "BEL", '\007'
      85                 :     },
      86                 :     {
      87                 :         "alert", '\007'
      88                 :     },
      89                 :     {
      90                 :         "BS", '\010'
      91                 :     },
      92                 :     {
      93                 :         "backspace", '\b'
      94                 :     },
      95                 :     {
      96                 :         "HT", '\011'
      97                 :     },
      98                 :     {
      99                 :         "tab", '\t'
     100                 :     },
     101                 :     {
     102                 :         "LF", '\012'
     103                 :     },
     104                 :     {
     105                 :         "newline", '\n'
     106                 :     },
     107                 :     {
     108                 :         "VT", '\013'
     109                 :     },
     110                 :     {
     111                 :         "vertical-tab", '\v'
     112                 :     },
     113                 :     {
     114                 :         "FF", '\014'
     115                 :     },
     116                 :     {
     117                 :         "form-feed", '\f'
     118                 :     },
     119                 :     {
     120                 :         "CR", '\015'
     121                 :     },
     122                 :     {
     123                 :         "carriage-return", '\r'
     124                 :     },
     125                 :     {
     126                 :         "SO", '\016'
     127                 :     },
     128                 :     {
     129                 :         "SI", '\017'
     130                 :     },
     131                 :     {
     132                 :         "DLE", '\020'
     133                 :     },
     134                 :     {
     135                 :         "DC1", '\021'
     136                 :     },
     137                 :     {
     138                 :         "DC2", '\022'
     139                 :     },
     140                 :     {
     141                 :         "DC3", '\023'
     142                 :     },
     143                 :     {
     144                 :         "DC4", '\024'
     145                 :     },
     146                 :     {
     147                 :         "NAK", '\025'
     148                 :     },
     149                 :     {
     150                 :         "SYN", '\026'
     151                 :     },
     152                 :     {
     153                 :         "ETB", '\027'
     154                 :     },
     155                 :     {
     156                 :         "CAN", '\030'
     157                 :     },
     158                 :     {
     159                 :         "EM", '\031'
     160                 :     },
     161                 :     {
     162                 :         "SUB", '\032'
     163                 :     },
     164                 :     {
     165                 :         "ESC", '\033'
     166                 :     },
     167                 :     {
     168                 :         "IS4", '\034'
     169                 :     },
     170                 :     {
     171                 :         "FS", '\034'
     172                 :     },
     173                 :     {
     174                 :         "IS3", '\035'
     175                 :     },
     176                 :     {
     177                 :         "GS", '\035'
     178                 :     },
     179                 :     {
     180                 :         "IS2", '\036'
     181                 :     },
     182                 :     {
     183                 :         "RS", '\036'
     184                 :     },
     185                 :     {
     186                 :         "IS1", '\037'
     187                 :     },
     188                 :     {
     189                 :         "US", '\037'
     190                 :     },
     191                 :     {
     192                 :         "space", ' '
     193                 :     },
     194                 :     {
     195                 :         "exclamation-mark", '!'
     196                 :     },
     197                 :     {
     198                 :         "quotation-mark", '"'
     199                 :     },
     200                 :     {
     201                 :         "number-sign", '#'
     202                 :     },
     203                 :     {
     204                 :         "dollar-sign", '$'
     205                 :     },
     206                 :     {
     207                 :         "percent-sign", '%'
     208                 :     },
     209                 :     {
     210                 :         "ampersand", '&'
     211                 :     },
     212                 :     {
     213                 :         "apostrophe", '\''
     214                 :     },
     215                 :     {
     216                 :         "left-parenthesis", '('
     217                 :     },
     218                 :     {
     219                 :         "right-parenthesis", ')'
     220                 :     },
     221                 :     {
     222                 :         "asterisk", '*'
     223                 :     },
     224                 :     {
     225                 :         "plus-sign", '+'
     226                 :     },
     227                 :     {
     228                 :         "comma", ','
     229                 :     },
     230                 :     {
     231                 :         "hyphen", '-'
     232                 :     },
     233                 :     {
     234                 :         "hyphen-minus", '-'
     235                 :     },
     236                 :     {
     237                 :         "period", '.'
     238                 :     },
     239                 :     {
     240                 :         "full-stop", '.'
     241                 :     },
     242                 :     {
     243                 :         "slash", '/'
     244                 :     },
     245                 :     {
     246                 :         "solidus", '/'
     247                 :     },
     248                 :     {
     249                 :         "zero", '0'
     250                 :     },
     251                 :     {
     252                 :         "one", '1'
     253                 :     },
     254                 :     {
     255                 :         "two", '2'
     256                 :     },
     257                 :     {
     258                 :         "three", '3'
     259                 :     },
     260                 :     {
     261                 :         "four", '4'
     262                 :     },
     263                 :     {
     264                 :         "five", '5'
     265                 :     },
     266                 :     {
     267                 :         "six", '6'
     268                 :     },
     269                 :     {
     270                 :         "seven", '7'
     271                 :     },
     272                 :     {
     273                 :         "eight", '8'
     274                 :     },
     275                 :     {
     276                 :         "nine", '9'
     277                 :     },
     278                 :     {
     279                 :         "colon", ':'
     280                 :     },
     281                 :     {
     282                 :         "semicolon", ';'
     283                 :     },
     284                 :     {
     285                 :         "less-than-sign", '<'
     286                 :     },
     287                 :     {
     288                 :         "equals-sign", '='
     289                 :     },
     290                 :     {
     291                 :         "greater-than-sign", '>'
     292                 :     },
     293                 :     {
     294                 :         "question-mark", '?'
     295                 :     },
     296                 :     {
     297                 :         "commercial-at", '@'
     298                 :     },
     299                 :     {
     300                 :         "left-square-bracket", '['
     301                 :     },
     302                 :     {
     303                 :         "backslash", '\\'
     304                 :     },
     305                 :     {
     306                 :         "reverse-solidus", '\\'
     307                 :     },
     308                 :     {
     309                 :         "right-square-bracket", ']'
     310                 :     },
     311                 :     {
     312                 :         "circumflex", '^'
     313                 :     },
     314                 :     {
     315                 :         "circumflex-accent", '^'
     316                 :     },
     317                 :     {
     318                 :         "underscore", '_'
     319                 :     },
     320                 :     {
     321                 :         "low-line", '_'
     322                 :     },
     323                 :     {
     324                 :         "grave-accent", '`'
     325                 :     },
     326                 :     {
     327                 :         "left-brace", '{'
     328                 :     },
     329                 :     {
     330                 :         "left-curly-bracket", '{'
     331                 :     },
     332                 :     {
     333                 :         "vertical-line", '|'
     334                 :     },
     335                 :     {
     336                 :         "right-brace", '}'
     337                 :     },
     338                 :     {
     339                 :         "right-curly-bracket", '}'
     340                 :     },
     341                 :     {
     342                 :         "tilde", '~'
     343                 :     },
     344                 :     {
     345                 :         "DEL", '\177'
     346                 :     },
     347                 :     {
     348                 :         NULL, 0
     349                 :     }
     350                 : };
     351                 : 
     352                 : /*
     353                 :  * The following array defines the valid character class names.
     354                 :  * The entries must match enum char_classes in regguts.h.
     355                 :  */
     356                 : static const char *const classNames[NUM_CCLASSES + 1] = {
     357                 :     "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
     358                 :     "lower", "print", "punct", "space", "upper", "xdigit", "word",
     359                 :     NULL
     360                 : };
     361                 : 
     362                 : /*
     363                 :  * We do not use the hard-wired Unicode classification tables that Tcl does.
     364                 :  * This is because (a) we need to deal with other encodings besides Unicode,
     365                 :  * and (b) we want to track the behavior of the libc locale routines as
     366                 :  * closely as possible.  For example, it wouldn't be unreasonable for a
     367                 :  * locale to not consider every Unicode letter as a letter.  So we build
     368                 :  * character classification cvecs by asking libc, even for Unicode.
     369                 :  */
     370                 : 
     371                 : 
     372                 : /*
     373                 :  * element - map collating-element name to chr
     374                 :  */
     375                 : static chr
     376 CBC          24 : element(struct vars *v,         /* context */
     377                 :         const chr *startp,      /* points to start of name */
     378                 :         const chr *endp)        /* points just past end of name */
     379                 : {
     380                 :     const struct cname *cn;
     381                 :     size_t      len;
     382                 : 
     383                 :     /* generic:  one-chr names stand for themselves */
     384              24 :     assert(startp < endp);
     385              24 :     len = endp - startp;
     386              24 :     if (len == 1)
     387              14 :         return *startp;
     388                 : 
     389              10 :     NOTE(REG_ULOCALE);
     390                 : 
     391                 :     /* search table */
     392             680 :     for (cn = cnames; cn->name != NULL; cn++)
     393                 :     {
     394             753 :         if (strlen(cn->name) == len &&
     395              77 :             pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
     396                 :         {
     397               6 :             break;              /* NOTE BREAK OUT */
     398                 :         }
     399                 :     }
     400              10 :     if (cn->name != NULL)
     401               6 :         return CHR(cn->code);
     402                 : 
     403                 :     /* couldn't find it */
     404               4 :     ERR(REG_ECOLLATE);
     405               4 :     return 0;
     406                 : }
     407                 : 
     408                 : /*
     409                 :  * range - supply cvec for a range, including legality check
     410                 :  */
     411                 : static struct cvec *
     412             533 : range(struct vars *v,           /* context */
     413                 :       chr a,                    /* range start */
     414                 :       chr b,                    /* range end, might equal a */
     415                 :       int cases)                /* case-independent? */
     416                 : {
     417                 :     int         nchrs;
     418                 :     struct cvec *cv;
     419                 :     chr         c,
     420                 :                 cc;
     421                 : 
     422             533 :     if (a != b && !before(a, b))
     423                 :     {
     424               2 :         ERR(REG_ERANGE);
     425               2 :         return NULL;
     426                 :     }
     427                 : 
     428             531 :     if (!cases)
     429                 :     {                           /* easy version */
     430             515 :         cv = getcvec(v, 0, 1);
     431             515 :         NOERRN();
     432             515 :         addrange(cv, a, b);
     433             515 :         return cv;
     434                 :     }
     435                 : 
     436                 :     /*
     437                 :      * When case-independent, it's hard to decide when cvec ranges are usable,
     438                 :      * so for now at least, we won't try.  We use a range for the originally
     439                 :      * specified chrs and then add on any case-equivalents that are outside
     440                 :      * that range as individual chrs.
     441                 :      *
     442                 :      * To ensure sane behavior if someone specifies a very large range, limit
     443                 :      * the allocation size to 100000 chrs (arbitrary) and check for overrun
     444                 :      * inside the loop below.
     445                 :      */
     446              16 :     nchrs = b - a + 1;
     447              16 :     if (nchrs <= 0 || nchrs > 100000)
     448 UBC           0 :         nchrs = 100000;
     449                 : 
     450 CBC          16 :     cv = getcvec(v, nchrs, 1);
     451              16 :     NOERRN();
     452              16 :     addrange(cv, a, b);
     453                 : 
     454            4407 :     for (c = a; c <= b; c++)
     455                 :     {
     456            4391 :         cc = pg_wc_tolower(c);
     457            4883 :         if (cc != c &&
     458             983 :             (before(cc, a) || before(b, cc)))
     459                 :         {
     460             221 :             if (cv->nchrs >= cv->chrspace)
     461                 :             {
     462 UBC           0 :                 ERR(REG_ETOOBIG);
     463               0 :                 return NULL;
     464                 :             }
     465 CBC         221 :             addchr(cv, cc);
     466                 :         }
     467            4391 :         cc = pg_wc_toupper(c);
     468            4870 :         if (cc != c &&
     469             755 :             (before(cc, a) || before(b, cc)))
     470                 :         {
     471             207 :             if (cv->nchrs >= cv->chrspace)
     472                 :             {
     473 UBC           0 :                 ERR(REG_ETOOBIG);
     474               0 :                 return NULL;
     475                 :             }
     476 CBC         207 :             addchr(cv, cc);
     477                 :         }
     478 GNC        4391 :         INTERRUPT(v->re);
     479                 :     }
     480                 : 
     481 GIC          16 :     return cv;
     482                 : }
     483                 : 
     484 ECB             : /*
     485                 :  * before - is chr x before chr y, for purposes of range legality?
     486                 :  */
     487                 : static int                      /* predicate */
     488 CBC        2261 : before(chr x, chr y)
     489                 : {
     490 GIC        2261 :     if (x < y)
     491             949 :         return 1;
     492            1312 :     return 0;
     493                 : }
     494                 : 
     495                 : /*
     496 ECB             :  * eclass - supply cvec for an equivalence class
     497                 :  * Must include case counterparts on request.
     498                 :  */
     499                 : static struct cvec *
     500 GIC          10 : eclass(struct vars *v,          /* context */
     501                 :        chr c,                   /* Collating element representing the
     502                 :                                  * equivalence class. */
     503                 :        int cases)               /* all cases? */
     504 ECB             : {
     505                 :     struct cvec *cv;
     506                 : 
     507                 :     /* crude fake equivalence class for testing */
     508 CBC          10 :     if ((v->cflags & REG_FAKE) && c == 'x')
     509 ECB             :     {
     510 GIC           6 :         cv = getcvec(v, 4, 0);
     511 GBC           6 :         addchr(cv, CHR('x'));
     512               6 :         addchr(cv, CHR('y'));
     513 GIC           6 :         if (cases)
     514 ECB             :         {
     515 UIC           0 :             addchr(cv, CHR('X'));
     516               0 :             addchr(cv, CHR('Y'));
     517                 :         }
     518 CBC           6 :         return cv;
     519 ECB             :     }
     520                 : 
     521                 :     /* otherwise, none */
     522 CBC           4 :     if (cases)
     523               2 :         return allcases(v, c);
     524 GIC           2 :     cv = getcvec(v, 1, 0);
     525               2 :     assert(cv != NULL);
     526               2 :     addchr(cv, c);
     527               2 :     return cv;
     528                 : }
     529                 : 
     530                 : /*
     531                 :  * lookupcclass - lookup a character class identified by name
     532 ECB             :  *
     533                 :  * On failure, sets an error code in *v; the result is then garbage.
     534                 :  */
     535                 : static enum char_classes
     536 GIC         131 : lookupcclass(struct vars *v,    /* context (for returning errors) */
     537                 :              const chr *startp, /* where the name starts */
     538                 :              const chr *endp)   /* just past the end of the name */
     539                 : {
     540                 :     size_t      len;
     541                 :     const char *const *namePtr;
     542                 :     int         i;
     543 ECB             : 
     544                 :     /*
     545                 :      * Map the name to the corresponding enumerated value.
     546                 :      */
     547 CBC         131 :     len = endp - startp;
     548             857 :     for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
     549                 :     {
     550 GIC        1613 :         if (strlen(*namePtr) == len &&
     551 CBC         760 :             pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
     552             127 :             return (enum char_classes) i;
     553                 :     }
     554                 : 
     555 GIC           4 :     ERR(REG_ECTYPE);
     556               4 :     return (enum char_classes) 0;
     557                 : }
     558                 : 
     559                 : /*
     560                 :  * cclasscvec - supply cvec for a character class
     561                 :  *
     562                 :  * Must include case counterparts if "cases" is true.
     563                 :  *
     564                 :  * The returned cvec might be either a transient cvec gotten from getcvec(),
     565 ECB             :  * or a permanently cached one from pg_ctype_get_cache().  This is okay
     566                 :  * because callers are not supposed to explicitly free the result either way.
     567                 :  */
     568                 : static struct cvec *
     569 CBC         402 : cclasscvec(struct vars *v,      /* context */
     570                 :            enum char_classes cclasscode,    /* class to build a cvec for */
     571                 :            int cases)           /* case-independent? */
     572                 : {
     573 GIC         402 :     struct cvec *cv = NULL;
     574                 : 
     575 ECB             :     /*
     576                 :      * Remap lower and upper to alpha if the match is case insensitive.
     577                 :      */
     578                 : 
     579 GIC         402 :     if (cases &&
     580               9 :         (cclasscode == CC_LOWER ||
     581                 :          cclasscode == CC_UPPER))
     582               1 :         cclasscode = CC_ALPHA;
     583                 : 
     584                 :     /*
     585                 :      * Now compute the character class contents.  For classes that are based
     586                 :      * on the behavior of a <wctype.h> or <ctype.h> function, we use
     587                 :      * pg_ctype_get_cache so that we can cache the results.  Other classes
     588                 :      * have definitions that are hard-wired here, and for those we just
     589                 :      * construct a transient cvec on the fly.
     590 ECB             :      *
     591                 :      * NB: keep this code in sync with cclass_column_index(), below.
     592                 :      */
     593                 : 
     594 CBC         402 :     switch (cclasscode)
     595 ECB             :     {
     596 CBC           7 :         case CC_PRINT:
     597               7 :             cv = pg_ctype_get_cache(pg_wc_isprint, cclasscode);
     598               7 :             break;
     599              11 :         case CC_ALNUM:
     600              11 :             cv = pg_ctype_get_cache(pg_wc_isalnum, cclasscode);
     601              11 :             break;
     602               8 :         case CC_ALPHA:
     603               8 :             cv = pg_ctype_get_cache(pg_wc_isalpha, cclasscode);
     604               8 :             break;
     605 GIC         128 :         case CC_WORD:
     606 CBC         128 :             cv = pg_ctype_get_cache(pg_wc_isword, cclasscode);
     607             128 :             break;
     608               1 :         case CC_ASCII:
     609 ECB             :             /* hard-wired meaning */
     610 CBC           1 :             cv = getcvec(v, 0, 1);
     611 GIC           1 :             if (cv)
     612 CBC           1 :                 addrange(cv, 0, 0x7f);
     613               1 :             break;
     614              35 :         case CC_BLANK:
     615 ECB             :             /* hard-wired meaning */
     616 CBC          35 :             cv = getcvec(v, 2, 0);
     617 GIC          35 :             addchr(cv, '\t');
     618 CBC          35 :             addchr(cv, ' ');
     619              35 :             break;
     620               1 :         case CC_CNTRL:
     621 ECB             :             /* hard-wired meaning */
     622 CBC           1 :             cv = getcvec(v, 0, 2);
     623               1 :             addrange(cv, 0x0, 0x1f);
     624               1 :             addrange(cv, 0x7f, 0x9f);
     625               1 :             break;
     626             126 :         case CC_DIGIT:
     627             126 :             cv = pg_ctype_get_cache(pg_wc_isdigit, cclasscode);
     628             126 :             break;
     629 GIC           7 :         case CC_PUNCT:
     630               7 :             cv = pg_ctype_get_cache(pg_wc_ispunct, cclasscode);
     631               7 :             break;
     632               2 :         case CC_XDIGIT:
     633                 : 
     634                 :             /*
     635 ECB             :              * It's not clear how to define this in non-western locales, and
     636                 :              * even less clear that there's any particular use in trying. So
     637                 :              * just hard-wire the meaning.
     638                 :              */
     639 CBC           2 :             cv = getcvec(v, 0, 3);
     640               2 :             if (cv)
     641                 :             {
     642               2 :                 addrange(cv, '0', '9');
     643               2 :                 addrange(cv, 'a', 'f');
     644               2 :                 addrange(cv, 'A', 'F');
     645 ECB             :             }
     646 CBC           2 :             break;
     647              53 :         case CC_SPACE:
     648              53 :             cv = pg_ctype_get_cache(pg_wc_isspace, cclasscode);
     649              53 :             break;
     650               7 :         case CC_LOWER:
     651               7 :             cv = pg_ctype_get_cache(pg_wc_islower, cclasscode);
     652               7 :             break;
     653               9 :         case CC_UPPER:
     654               9 :             cv = pg_ctype_get_cache(pg_wc_isupper, cclasscode);
     655 GIC           9 :             break;
     656               7 :         case CC_GRAPH:
     657               7 :             cv = pg_ctype_get_cache(pg_wc_isgraph, cclasscode);
     658 CBC           7 :             break;
     659 EUB             :     }
     660 ECB             : 
     661                 :     /* If cv is NULL now, the reason must be "out of memory" */
     662 GIC         402 :     if (cv == NULL)
     663 UIC           0 :         ERR(REG_ESPACE);
     664 GIC         402 :     return cv;
     665                 : }
     666                 : 
     667 ECB             : /*
     668                 :  * cclass_column_index - get appropriate high colormap column index for chr
     669                 :  */
     670                 : static int
     671 GIC          32 : cclass_column_index(struct colormap *cm, chr c)
     672 ECB             : {
     673 GIC          32 :     int         colnum = 0;
     674                 : 
     675                 :     /* Shouldn't go through all these pushups for simple chrs */
     676              32 :     assert(c > MAX_SIMPLE_CHR);
     677                 : 
     678 ECB             :     /*
     679                 :      * Note: we should not see requests to consider cclasses that are not
     680                 :      * treated as locale-specific by cclasscvec(), above.
     681                 :      */
     682 CBC          32 :     if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
     683               3 :         colnum |= cm->classbits[CC_PRINT];
     684              32 :     if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
     685              10 :         colnum |= cm->classbits[CC_ALNUM];
     686              32 :     if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
     687               5 :         colnum |= cm->classbits[CC_ALPHA];
     688              32 :     if (cm->classbits[CC_WORD] && pg_wc_isword(c))
     689               1 :         colnum |= cm->classbits[CC_WORD];
     690 GBC          32 :     assert(cm->classbits[CC_ASCII] == 0);
     691 CBC          32 :     assert(cm->classbits[CC_BLANK] == 0);
     692 GBC          32 :     assert(cm->classbits[CC_CNTRL] == 0);
     693 CBC          32 :     if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
     694 LBC           0 :         colnum |= cm->classbits[CC_DIGIT];
     695 GBC          32 :     if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
     696 LBC           0 :         colnum |= cm->classbits[CC_PUNCT];
     697 GBC          32 :     assert(cm->classbits[CC_XDIGIT] == 0);
     698 CBC          32 :     if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
     699 UBC           0 :         colnum |= cm->classbits[CC_SPACE];
     700 CBC          32 :     if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
     701 LBC           0 :         colnum |= cm->classbits[CC_LOWER];
     702 GIC          32 :     if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
     703 LBC           0 :         colnum |= cm->classbits[CC_UPPER];
     704 GIC          32 :     if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
     705               3 :         colnum |= cm->classbits[CC_GRAPH];
     706                 : 
     707              32 :     return colnum;
     708                 : }
     709                 : 
     710                 : /*
     711                 :  * allcases - supply cvec for all case counterparts of a chr (including itself)
     712                 :  *
     713 ECB             :  * This is a shortcut, preferably an efficient one, for simple characters;
     714                 :  * messy cases are done via range().
     715                 :  */
     716                 : static struct cvec *
     717 GIC         867 : allcases(struct vars *v,        /* context */
     718                 :          chr c)                 /* character to get case equivs of */
     719                 : {
     720 ECB             :     struct cvec *cv;
     721                 :     chr         lc,
     722                 :                 uc;
     723                 : 
     724 CBC         867 :     lc = pg_wc_tolower(c);
     725             867 :     uc = pg_wc_toupper(c);
     726 ECB             : 
     727 CBC         867 :     cv = getcvec(v, 2, 0);
     728 GIC         867 :     addchr(cv, lc);
     729             867 :     if (lc != uc)
     730             737 :         addchr(cv, uc);
     731             867 :     return cv;
     732                 : }
     733                 : 
     734                 : /*
     735                 :  * cmp - chr-substring compare
     736                 :  *
     737                 :  * Backrefs need this.  It should preferably be efficient.
     738                 :  * Note that it does not need to report anything except equal/unequal.
     739 ECB             :  * Note also that the length is exact, and the comparison should not
     740                 :  * stop at embedded NULs!
     741                 :  */
     742                 : static int                      /* 0 for equal, nonzero for unequal */
     743 GIC         642 : cmp(const chr *x, const chr *y, /* strings to compare */
     744                 :     size_t len)                 /* exact length of comparison */
     745                 : {
     746             642 :     return memcmp(VS(x), VS(y), len * sizeof(chr));
     747                 : }
     748                 : 
     749                 : /*
     750                 :  * casecmp - case-independent chr-substring compare
     751                 :  *
     752                 :  * REG_ICASE backrefs need this.  It should preferably be efficient.
     753                 :  * Note that it does not need to report anything except equal/unequal.
     754 ECB             :  * Note also that the length is exact, and the comparison should not
     755                 :  * stop at embedded NULs!
     756                 :  */
     757                 : static int                      /* 0 for equal, nonzero for unequal */
     758 GIC           1 : casecmp(const chr *x, const chr *y, /* strings to compare */
     759 ECB             :         size_t len)             /* exact length of comparison */
     760 EUB             : {
     761 GIC           2 :     for (; len > 0; len--, x++, y++)
     762 ECB             :     {
     763 GIC           1 :         if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
     764 UIC           0 :             return 1;
     765                 :     }
     766 GIC           1 :     return 0;
     767                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a