LCOV - differential code coverage report
Current view: top level - src/backend/regex - regc_locale.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 92.1 % 177 163 4 4 6 6 49 1 107 2 50 2
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 10 10 8 1 1 8
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (240..) days: 92.0 % 176 162 4 4 6 6 49 107 2 50
Function coverage date bins:
(240..) days: 55.6 % 18 10 8 1 1 8

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*
                                  2                 :  * regc_locale.c --
                                  3                 :  *
                                  4                 :  *  This file contains locale-specific regexp routines.
                                  5                 :  *  This file is #included by regcomp.c.
                                  6                 :  *
                                  7                 :  * Copyright (c) 1998 by Scriptics Corporation.
                                  8                 :  *
                                  9                 :  * This software is copyrighted by the Regents of the University of
                                 10                 :  * California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState
                                 11                 :  * Corporation and other parties.  The following terms apply to all files
                                 12                 :  * associated with the software unless explicitly disclaimed in
                                 13                 :  * individual files.
                                 14                 :  *
                                 15                 :  * The authors hereby grant permission to use, copy, modify, distribute,
                                 16                 :  * and license this software and its documentation for any purpose, provided
                                 17                 :  * that existing copyright notices are retained in all copies and that this
                                 18                 :  * notice is included verbatim in any distributions. No written agreement,
                                 19                 :  * license, or royalty fee is required for any of the authorized uses.
                                 20                 :  * Modifications to this software may be copyrighted by their authors
                                 21                 :  * and need not follow the licensing terms described here, provided that
                                 22                 :  * the new terms are clearly indicated on the first page of each file where
                                 23                 :  * they apply.
                                 24                 :  *
                                 25                 :  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
                                 26                 :  * FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
                                 27                 :  * ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
                                 28                 :  * DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
                                 29                 :  * POSSIBILITY OF SUCH DAMAGE.
                                 30                 :  *
                                 31                 :  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
                                 32                 :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
                                 33                 :  * FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
                                 34                 :  * IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
                                 35                 :  * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
                                 36                 :  * MODIFICATIONS.
                                 37                 :  *
                                 38                 :  * GOVERNMENT USE: If you are acquiring this software on behalf of the
                                 39                 :  * U.S. government, the Government shall have only "Restricted Rights"
                                 40                 :  * in the software and related documentation as defined in the Federal
                                 41                 :  * Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
                                 42                 :  * are acquiring the software on behalf of the Department of Defense, the
                                 43                 :  * software shall be classified as "Commercial Computer Software" and the
                                 44                 :  * Government shall have only "Restricted Rights" as defined in Clause
                                 45                 :  * 252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
                                 46                 :  * authors grant the U.S. Government and others acting in its behalf
                                 47                 :  * permission to use and distribute the software in accordance with the
                                 48                 :  * terms specified in this license.
                                 49                 :  *
                                 50                 :  * src/backend/regex/regc_locale.c
                                 51                 :  */
                                 52                 : 
                                 53                 : /* ASCII character-name table */
                                 54                 : 
                                 55                 : static const struct cname
                                 56                 : {
                                 57                 :     const char *name;
                                 58                 :     const char  code;
                                 59                 : }           cnames[] =
                                 60                 : 
                                 61                 : {
                                 62                 :     {
                                 63                 :         "NUL", '\0'
                                 64                 :     },
                                 65                 :     {
                                 66                 :         "SOH", '\001'
                                 67                 :     },
                                 68                 :     {
                                 69                 :         "STX", '\002'
                                 70                 :     },
                                 71                 :     {
                                 72                 :         "ETX", '\003'
                                 73                 :     },
                                 74                 :     {
                                 75                 :         "EOT", '\004'
                                 76                 :     },
                                 77                 :     {
                                 78                 :         "ENQ", '\005'
                                 79                 :     },
                                 80                 :     {
                                 81                 :         "ACK", '\006'
                                 82                 :     },
                                 83                 :     {
                                 84                 :         "BEL", '\007'
                                 85                 :     },
                                 86                 :     {
                                 87                 :         "alert", '\007'
                                 88                 :     },
                                 89                 :     {
                                 90                 :         "BS", '\010'
                                 91                 :     },
                                 92                 :     {
                                 93                 :         "backspace", '\b'
                                 94                 :     },
                                 95                 :     {
                                 96                 :         "HT", '\011'
                                 97                 :     },
                                 98                 :     {
                                 99                 :         "tab", '\t'
                                100                 :     },
                                101                 :     {
                                102                 :         "LF", '\012'
                                103                 :     },
                                104                 :     {
                                105                 :         "newline", '\n'
                                106                 :     },
                                107                 :     {
                                108                 :         "VT", '\013'
                                109                 :     },
                                110                 :     {
                                111                 :         "vertical-tab", '\v'
                                112                 :     },
                                113                 :     {
                                114                 :         "FF", '\014'
                                115                 :     },
                                116                 :     {
                                117                 :         "form-feed", '\f'
                                118                 :     },
                                119                 :     {
                                120                 :         "CR", '\015'
                                121                 :     },
                                122                 :     {
                                123                 :         "carriage-return", '\r'
                                124                 :     },
                                125                 :     {
                                126                 :         "SO", '\016'
                                127                 :     },
                                128                 :     {
                                129                 :         "SI", '\017'
                                130                 :     },
                                131                 :     {
                                132                 :         "DLE", '\020'
                                133                 :     },
                                134                 :     {
                                135                 :         "DC1", '\021'
                                136                 :     },
                                137                 :     {
                                138                 :         "DC2", '\022'
                                139                 :     },
                                140                 :     {
                                141                 :         "DC3", '\023'
                                142                 :     },
                                143                 :     {
                                144                 :         "DC4", '\024'
                                145                 :     },
                                146                 :     {
                                147                 :         "NAK", '\025'
                                148                 :     },
                                149                 :     {
                                150                 :         "SYN", '\026'
                                151                 :     },
                                152                 :     {
                                153                 :         "ETB", '\027'
                                154                 :     },
                                155                 :     {
                                156                 :         "CAN", '\030'
                                157                 :     },
                                158                 :     {
                                159                 :         "EM", '\031'
                                160                 :     },
                                161                 :     {
                                162                 :         "SUB", '\032'
                                163                 :     },
                                164                 :     {
                                165                 :         "ESC", '\033'
                                166                 :     },
                                167                 :     {
                                168                 :         "IS4", '\034'
                                169                 :     },
                                170                 :     {
                                171                 :         "FS", '\034'
                                172                 :     },
                                173                 :     {
                                174                 :         "IS3", '\035'
                                175                 :     },
                                176                 :     {
                                177                 :         "GS", '\035'
                                178                 :     },
                                179                 :     {
                                180                 :         "IS2", '\036'
                                181                 :     },
                                182                 :     {
                                183                 :         "RS", '\036'
                                184                 :     },
                                185                 :     {
                                186                 :         "IS1", '\037'
                                187                 :     },
                                188                 :     {
                                189                 :         "US", '\037'
                                190                 :     },
                                191                 :     {
                                192                 :         "space", ' '
                                193                 :     },
                                194                 :     {
                                195                 :         "exclamation-mark", '!'
                                196                 :     },
                                197                 :     {
                                198                 :         "quotation-mark", '"'
                                199                 :     },
                                200                 :     {
                                201                 :         "number-sign", '#'
                                202                 :     },
                                203                 :     {
                                204                 :         "dollar-sign", '$'
                                205                 :     },
                                206                 :     {
                                207                 :         "percent-sign", '%'
                                208                 :     },
                                209                 :     {
                                210                 :         "ampersand", '&'
                                211                 :     },
                                212                 :     {
                                213                 :         "apostrophe", '\''
                                214                 :     },
                                215                 :     {
                                216                 :         "left-parenthesis", '('
                                217                 :     },
                                218                 :     {
                                219                 :         "right-parenthesis", ')'
                                220                 :     },
                                221                 :     {
                                222                 :         "asterisk", '*'
                                223                 :     },
                                224                 :     {
                                225                 :         "plus-sign", '+'
                                226                 :     },
                                227                 :     {
                                228                 :         "comma", ','
                                229                 :     },
                                230                 :     {
                                231                 :         "hyphen", '-'
                                232                 :     },
                                233                 :     {
                                234                 :         "hyphen-minus", '-'
                                235                 :     },
                                236                 :     {
                                237                 :         "period", '.'
                                238                 :     },
                                239                 :     {
                                240                 :         "full-stop", '.'
                                241                 :     },
                                242                 :     {
                                243                 :         "slash", '/'
                                244                 :     },
                                245                 :     {
                                246                 :         "solidus", '/'
                                247                 :     },
                                248                 :     {
                                249                 :         "zero", '0'
                                250                 :     },
                                251                 :     {
                                252                 :         "one", '1'
                                253                 :     },
                                254                 :     {
                                255                 :         "two", '2'
                                256                 :     },
                                257                 :     {
                                258                 :         "three", '3'
                                259                 :     },
                                260                 :     {
                                261                 :         "four", '4'
                                262                 :     },
                                263                 :     {
                                264                 :         "five", '5'
                                265                 :     },
                                266                 :     {
                                267                 :         "six", '6'
                                268                 :     },
                                269                 :     {
                                270                 :         "seven", '7'
                                271                 :     },
                                272                 :     {
                                273                 :         "eight", '8'
                                274                 :     },
                                275                 :     {
                                276                 :         "nine", '9'
                                277                 :     },
                                278                 :     {
                                279                 :         "colon", ':'
                                280                 :     },
                                281                 :     {
                                282                 :         "semicolon", ';'
                                283                 :     },
                                284                 :     {
                                285                 :         "less-than-sign", '<'
                                286                 :     },
                                287                 :     {
                                288                 :         "equals-sign", '='
                                289                 :     },
                                290                 :     {
                                291                 :         "greater-than-sign", '>'
                                292                 :     },
                                293                 :     {
                                294                 :         "question-mark", '?'
                                295                 :     },
                                296                 :     {
                                297                 :         "commercial-at", '@'
                                298                 :     },
                                299                 :     {
                                300                 :         "left-square-bracket", '['
                                301                 :     },
                                302                 :     {
                                303                 :         "backslash", '\\'
                                304                 :     },
                                305                 :     {
                                306                 :         "reverse-solidus", '\\'
                                307                 :     },
                                308                 :     {
                                309                 :         "right-square-bracket", ']'
                                310                 :     },
                                311                 :     {
                                312                 :         "circumflex", '^'
                                313                 :     },
                                314                 :     {
                                315                 :         "circumflex-accent", '^'
                                316                 :     },
                                317                 :     {
                                318                 :         "underscore", '_'
                                319                 :     },
                                320                 :     {
                                321                 :         "low-line", '_'
                                322                 :     },
                                323                 :     {
                                324                 :         "grave-accent", '`'
                                325                 :     },
                                326                 :     {
                                327                 :         "left-brace", '{'
                                328                 :     },
                                329                 :     {
                                330                 :         "left-curly-bracket", '{'
                                331                 :     },
                                332                 :     {
                                333                 :         "vertical-line", '|'
                                334                 :     },
                                335                 :     {
                                336                 :         "right-brace", '}'
                                337                 :     },
                                338                 :     {
                                339                 :         "right-curly-bracket", '}'
                                340                 :     },
                                341                 :     {
                                342                 :         "tilde", '~'
                                343                 :     },
                                344                 :     {
                                345                 :         "DEL", '\177'
                                346                 :     },
                                347                 :     {
                                348                 :         NULL, 0
                                349                 :     }
                                350                 : };
                                351                 : 
                                352                 : /*
                                353                 :  * The following array defines the valid character class names.
                                354                 :  * The entries must match enum char_classes in regguts.h.
                                355                 :  */
                                356                 : static const char *const classNames[NUM_CCLASSES + 1] = {
                                357                 :     "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
                                358                 :     "lower", "print", "punct", "space", "upper", "xdigit", "word",
                                359                 :     NULL
                                360                 : };
                                361                 : 
                                362                 : /*
                                363                 :  * We do not use the hard-wired Unicode classification tables that Tcl does.
                                364                 :  * This is because (a) we need to deal with other encodings besides Unicode,
                                365                 :  * and (b) we want to track the behavior of the libc locale routines as
                                366                 :  * closely as possible.  For example, it wouldn't be unreasonable for a
                                367                 :  * locale to not consider every Unicode letter as a letter.  So we build
                                368                 :  * character classification cvecs by asking libc, even for Unicode.
                                369                 :  */
                                370                 : 
                                371                 : 
                                372                 : /*
                                373                 :  * element - map collating-element name to chr
                                374                 :  */
                                375                 : static chr
 2118 tgl                       376 CBC          24 : element(struct vars *v,         /* context */
                                377                 :         const chr *startp,      /* points to start of name */
                                378                 :         const chr *endp)        /* points just past end of name */
                                379                 : {
                                380                 :     const struct cname *cn;
                                381                 :     size_t      len;
                                382                 : 
                                383                 :     /* generic:  one-chr names stand for themselves */
 7188 bruce                     384              24 :     assert(startp < endp);
                                385              24 :     len = endp - startp;
                                386              24 :     if (len == 1)
                                387              14 :         return *startp;
                                388                 : 
                                389              10 :     NOTE(REG_ULOCALE);
                                390                 : 
                                391                 :     /* search table */
                                392             680 :     for (cn = cnames; cn->name != NULL; cn++)
                                393                 :     {
                                394             753 :         if (strlen(cn->name) == len &&
                                395              77 :             pg_char_and_wchar_strncmp(cn->name, startp, len) == 0)
                                396                 :         {
                                397               6 :             break;              /* NOTE BREAK OUT */
                                398                 :         }
                                399                 :     }
                                400              10 :     if (cn->name != NULL)
                                401               6 :         return CHR(cn->code);
                                402                 : 
                                403                 :     /* couldn't find it */
                                404               4 :     ERR(REG_ECOLLATE);
                                405               4 :     return 0;
                                406                 : }
                                407                 : 
                                408                 : /*
                                409                 :  * range - supply cvec for a range, including legality check
                                410                 :  */
                                411                 : static struct cvec *
 2118 tgl                       412             533 : range(struct vars *v,           /* context */
                                413                 :       chr a,                    /* range start */
                                414                 :       chr b,                    /* range end, might equal a */
                                415                 :       int cases)                /* case-independent? */
                                416                 : {
                                417                 :     int         nchrs;
                                418                 :     struct cvec *cv;
                                419                 :     chr         c,
                                420                 :                 cc;
                                421                 : 
 7188 bruce                     422             533 :     if (a != b && !before(a, b))
                                423                 :     {
                                424               2 :         ERR(REG_ERANGE);
                                425               2 :         return NULL;
                                426                 :     }
                                427                 : 
                                428             531 :     if (!cases)
                                429                 :     {                           /* easy version */
 5533 tgl                       430             515 :         cv = getcvec(v, 0, 1);
 7188 bruce                     431             515 :         NOERRN();
                                432             515 :         addrange(cv, a, b);
                                433             515 :         return cv;
                                434                 :     }
                                435                 : 
                                436                 :     /*
                                437                 :      * When case-independent, it's hard to decide when cvec ranges are usable,
                                438                 :      * so for now at least, we won't try.  We use a range for the originally
                                439                 :      * specified chrs and then add on any case-equivalents that are outside
                                440                 :      * that range as individual chrs.
                                441                 :      *
                                442                 :      * To ensure sane behavior if someone specifies a very large range, limit
                                443                 :      * the allocation size to 100000 chrs (arbitrary) and check for overrun
                                444                 :      * inside the loop below.
                                445                 :      */
 2617 tgl                       446              16 :     nchrs = b - a + 1;
                                447              16 :     if (nchrs <= 0 || nchrs > 100000)
 2617 tgl                       448 UBC           0 :         nchrs = 100000;
                                449                 : 
 2617 tgl                       450 CBC          16 :     cv = getcvec(v, nchrs, 1);
 7188 bruce                     451              16 :     NOERRN();
 2617 tgl                       452              16 :     addrange(cv, a, b);
                                453                 : 
 7188 bruce                     454            4407 :     for (c = a; c <= b; c++)
                                455                 :     {
 2424 tgl                       456            4391 :         cc = pg_wc_tolower(c);
 2617                           457            4883 :         if (cc != c &&
                                458             983 :             (before(cc, a) || before(b, cc)))
                                459                 :         {
                                460             221 :             if (cv->nchrs >= cv->chrspace)
                                461                 :             {
 2617 tgl                       462 UBC           0 :                 ERR(REG_ETOOBIG);
                                463               0 :                 return NULL;
                                464                 :             }
 2617 tgl                       465 CBC         221 :             addchr(cv, cc);
                                466                 :         }
 2424                           467            4391 :         cc = pg_wc_toupper(c);
 2617                           468            4870 :         if (cc != c &&
                                469             755 :             (before(cc, a) || before(b, cc)))
                                470                 :         {
                                471             207 :             if (cv->nchrs >= cv->chrspace)
                                472                 :             {
 2617 tgl                       473 UBC           0 :                 ERR(REG_ETOOBIG);
                                474               0 :                 return NULL;
                                475                 :             }
 2617 tgl                       476 CBC         207 :             addchr(cv, cc);
                                477                 :         }
    1 tmunro                    478 GNC        4391 :         INTERRUPT(v->re);
                                479                 :     }
                                480                 : 
 7188 bruce                     481 GIC          16 :     return cv;
                                482                 : }
                                483                 : 
 7368 tgl                       484 ECB             : /*
                                485                 :  * before - is chr x before chr y, for purposes of range legality?
                                486                 :  */
 7188 bruce                     487                 : static int                      /* predicate */
 2424 tgl                       488 CBC        2261 : before(chr x, chr y)
                                489                 : {
 7188 bruce                     490 GIC        2261 :     if (x < y)
                                491             949 :         return 1;
                                492            1312 :     return 0;
                                493                 : }
                                494                 : 
                                495                 : /*
 7368 tgl                       496 ECB             :  * eclass - supply cvec for an equivalence class
                                497                 :  * Must include case counterparts on request.
                                498                 :  */
                                499                 : static struct cvec *
 2118 tgl                       500 GIC          10 : eclass(struct vars *v,          /* context */
                                501                 :        chr c,                   /* Collating element representing the
                                502                 :                                  * equivalence class. */
                                503                 :        int cases)               /* all cases? */
 7368 tgl                       504 ECB             : {
                                505                 :     struct cvec *cv;
 7188 bruce                     506                 : 
                                507                 :     /* crude fake equivalence class for testing */
 7188 bruce                     508 CBC          10 :     if ((v->cflags & REG_FAKE) && c == 'x')
 7188 bruce                     509 ECB             :     {
 5533 tgl                       510 GIC           6 :         cv = getcvec(v, 4, 0);
 2424 tgl                       511 GBC           6 :         addchr(cv, CHR('x'));
                                512               6 :         addchr(cv, CHR('y'));
 7188 bruce                     513 GIC           6 :         if (cases)
 7188 bruce                     514 ECB             :         {
 2424 tgl                       515 UIC           0 :             addchr(cv, CHR('X'));
                                516               0 :             addchr(cv, CHR('Y'));
                                517                 :         }
 7188 bruce                     518 CBC           6 :         return cv;
 7368 tgl                       519 ECB             :     }
 7188 bruce                     520                 : 
                                521                 :     /* otherwise, none */
 7188 bruce                     522 CBC           4 :     if (cases)
                                523               2 :         return allcases(v, c);
 5533 tgl                       524 GIC           2 :     cv = getcvec(v, 1, 0);
 7188 bruce                     525               2 :     assert(cv != NULL);
 2424 tgl                       526               2 :     addchr(cv, c);
 7368                           527               2 :     return cv;
                                528                 : }
                                529                 : 
                                530                 : /*
                                531                 :  * lookupcclass - lookup a character class identified by name
 4067 tgl                       532 ECB             :  *
                                533                 :  * On failure, sets an error code in *v; the result is then garbage.
                                534                 :  */
                                535                 : static enum char_classes
  773 tgl                       536 GIC         131 : lookupcclass(struct vars *v,    /* context (for returning errors) */
                                537                 :              const chr *startp, /* where the name starts */
                                538                 :              const chr *endp)   /* just past the end of the name */
                                539                 : {
                                540                 :     size_t      len;
                                541                 :     const char *const *namePtr;
                                542                 :     int         i;
 7188 bruce                     543 ECB             : 
                                544                 :     /*
                                545                 :      * Map the name to the corresponding enumerated value.
                                546                 :      */
 7188 bruce                     547 CBC         131 :     len = endp - startp;
                                548             857 :     for (namePtr = classNames, i = 0; *namePtr != NULL; namePtr++, i++)
                                549                 :     {
 7188 bruce                     550 GIC        1613 :         if (strlen(*namePtr) == len &&
 7188 bruce                     551 CBC         760 :             pg_char_and_wchar_strncmp(*namePtr, startp, len) == 0)
  773 tgl                       552             127 :             return (enum char_classes) i;
                                553                 :     }
                                554                 : 
  773 tgl                       555 GIC           4 :     ERR(REG_ECTYPE);
                                556               4 :     return (enum char_classes) 0;
                                557                 : }
                                558                 : 
                                559                 : /*
                                560                 :  * cclasscvec - supply cvec for a character class
                                561                 :  *
                                562                 :  * Must include case counterparts if "cases" is true.
                                563                 :  *
                                564                 :  * The returned cvec might be either a transient cvec gotten from getcvec(),
  773 tgl                       565 ECB             :  * or a permanently cached one from pg_ctype_get_cache().  This is okay
                                566                 :  * because callers are not supposed to explicitly free the result either way.
                                567                 :  */
                                568                 : static struct cvec *
  773 tgl                       569 CBC         402 : cclasscvec(struct vars *v,      /* context */
                                570                 :            enum char_classes cclasscode,    /* class to build a cvec for */
                                571                 :            int cases)           /* case-independent? */
                                572                 : {
  773 tgl                       573 GIC         402 :     struct cvec *cv = NULL;
                                574                 : 
 7188 bruce                     575 ECB             :     /*
                                576                 :      * Remap lower and upper to alpha if the match is case insensitive.
                                577                 :      */
 7368 tgl                       578                 : 
 7188 bruce                     579 GIC         402 :     if (cases &&
  773 tgl                       580               9 :         (cclasscode == CC_LOWER ||
                                581                 :          cclasscode == CC_UPPER))
                                582               1 :         cclasscode = CC_ALPHA;
                                583                 : 
                                584                 :     /*
                                585                 :      * Now compute the character class contents.  For classes that are based
                                586                 :      * on the behavior of a <wctype.h> or <ctype.h> function, we use
                                587                 :      * pg_ctype_get_cache so that we can cache the results.  Other classes
                                588                 :      * have definitions that are hard-wired here, and for those we just
                                589                 :      * construct a transient cvec on the fly.
 2407 tgl                       590 ECB             :      *
                                591                 :      * NB: keep this code in sync with cclass_column_index(), below.
 7188 bruce                     592                 :      */
                                593                 : 
  773 tgl                       594 CBC         402 :     switch (cclasscode)
 7188 bruce                     595 ECB             :     {
 7188 bruce                     596 CBC           7 :         case CC_PRINT:
  773 tgl                       597               7 :             cv = pg_ctype_get_cache(pg_wc_isprint, cclasscode);
 7132                           598               7 :             break;
 7188 bruce                     599              11 :         case CC_ALNUM:
  773 tgl                       600              11 :             cv = pg_ctype_get_cache(pg_wc_isalnum, cclasscode);
 7188 bruce                     601              11 :             break;
                                602               8 :         case CC_ALPHA:
  773 tgl                       603               8 :             cv = pg_ctype_get_cache(pg_wc_isalpha, cclasscode);
                                604               8 :             break;
  773 tgl                       605 GIC         128 :         case CC_WORD:
  773 tgl                       606 CBC         128 :             cv = pg_ctype_get_cache(pg_wc_isword, cclasscode);
 7188 bruce                     607             128 :             break;
                                608               1 :         case CC_ASCII:
 4067 tgl                       609 ECB             :             /* hard-wired meaning */
 5533 tgl                       610 CBC           1 :             cv = getcvec(v, 0, 1);
 7188 bruce                     611 GIC           1 :             if (cv)
 7188 bruce                     612 CBC           1 :                 addrange(cv, 0, 0x7f);
                                613               1 :             break;
                                614              35 :         case CC_BLANK:
 4067 tgl                       615 ECB             :             /* hard-wired meaning */
 5533 tgl                       616 CBC          35 :             cv = getcvec(v, 2, 0);
 7188 bruce                     617 GIC          35 :             addchr(cv, '\t');
 7188 bruce                     618 CBC          35 :             addchr(cv, ' ');
                                619              35 :             break;
                                620               1 :         case CC_CNTRL:
 4067 tgl                       621 ECB             :             /* hard-wired meaning */
 5533 tgl                       622 CBC           1 :             cv = getcvec(v, 0, 2);
 7188 bruce                     623               1 :             addrange(cv, 0x0, 0x1f);
                                624               1 :             addrange(cv, 0x7f, 0x9f);
                                625               1 :             break;
                                626             126 :         case CC_DIGIT:
  773 tgl                       627             126 :             cv = pg_ctype_get_cache(pg_wc_isdigit, cclasscode);
 7188 bruce                     628             126 :             break;
 7188 bruce                     629 GIC           7 :         case CC_PUNCT:
  773 tgl                       630               7 :             cv = pg_ctype_get_cache(pg_wc_ispunct, cclasscode);
 7188 bruce                     631               7 :             break;
                                632               2 :         case CC_XDIGIT:
                                633                 : 
                                634                 :             /*
 4067 tgl                       635 ECB             :              * It's not clear how to define this in non-western locales, and
 3955 bruce                     636                 :              * even less clear that there's any particular use in trying. So
                                637                 :              * just hard-wire the meaning.
 4067 tgl                       638                 :              */
 5533 tgl                       639 CBC           2 :             cv = getcvec(v, 0, 3);
 7188 bruce                     640               2 :             if (cv)
                                641                 :             {
                                642               2 :                 addrange(cv, '0', '9');
                                643               2 :                 addrange(cv, 'a', 'f');
                                644               2 :                 addrange(cv, 'A', 'F');
 7188 bruce                     645 ECB             :             }
 7188 bruce                     646 CBC           2 :             break;
                                647              53 :         case CC_SPACE:
  773 tgl                       648              53 :             cv = pg_ctype_get_cache(pg_wc_isspace, cclasscode);
 7188 bruce                     649              53 :             break;
                                650               7 :         case CC_LOWER:
  773 tgl                       651               7 :             cv = pg_ctype_get_cache(pg_wc_islower, cclasscode);
 7188 bruce                     652               7 :             break;
                                653               9 :         case CC_UPPER:
  773 tgl                       654               9 :             cv = pg_ctype_get_cache(pg_wc_isupper, cclasscode);
 7188 bruce                     655 GIC           9 :             break;
                                656               7 :         case CC_GRAPH:
  773 tgl                       657               7 :             cv = pg_ctype_get_cache(pg_wc_isgraph, cclasscode);
 7188 bruce                     658 CBC           7 :             break;
 7368 tgl                       659 EUB             :     }
 4067 tgl                       660 ECB             : 
                                661                 :     /* If cv is NULL now, the reason must be "out of memory" */
 7188 bruce                     662 GIC         402 :     if (cv == NULL)
 7188 bruce                     663 UIC           0 :         ERR(REG_ESPACE);
 7188 bruce                     664 GIC         402 :     return cv;
                                665                 : }
                                666                 : 
 2407 tgl                       667 ECB             : /*
                                668                 :  * cclass_column_index - get appropriate high colormap column index for chr
                                669                 :  */
                                670                 : static int
 2118 tgl                       671 GIC          32 : cclass_column_index(struct colormap *cm, chr c)
 2407 tgl                       672 ECB             : {
 2407 tgl                       673 GIC          32 :     int         colnum = 0;
                                674                 : 
                                675                 :     /* Shouldn't go through all these pushups for simple chrs */
                                676              32 :     assert(c > MAX_SIMPLE_CHR);
                                677                 : 
 2407 tgl                       678 ECB             :     /*
                                679                 :      * Note: we should not see requests to consider cclasses that are not
  773                           680                 :      * treated as locale-specific by cclasscvec(), above.
 2407                           681                 :      */
 2407 tgl                       682 CBC          32 :     if (cm->classbits[CC_PRINT] && pg_wc_isprint(c))
                                683               3 :         colnum |= cm->classbits[CC_PRINT];
                                684              32 :     if (cm->classbits[CC_ALNUM] && pg_wc_isalnum(c))
                                685              10 :         colnum |= cm->classbits[CC_ALNUM];
                                686              32 :     if (cm->classbits[CC_ALPHA] && pg_wc_isalpha(c))
                                687               5 :         colnum |= cm->classbits[CC_ALPHA];
  773                           688              32 :     if (cm->classbits[CC_WORD] && pg_wc_isword(c))
                                689               1 :         colnum |= cm->classbits[CC_WORD];
 2407 tgl                       690 GBC          32 :     assert(cm->classbits[CC_ASCII] == 0);
 2407 tgl                       691 CBC          32 :     assert(cm->classbits[CC_BLANK] == 0);
 2407 tgl                       692 GBC          32 :     assert(cm->classbits[CC_CNTRL] == 0);
 2407 tgl                       693 CBC          32 :     if (cm->classbits[CC_DIGIT] && pg_wc_isdigit(c))
 2407 tgl                       694 LBC           0 :         colnum |= cm->classbits[CC_DIGIT];
 2407 tgl                       695 GBC          32 :     if (cm->classbits[CC_PUNCT] && pg_wc_ispunct(c))
 2407 tgl                       696 LBC           0 :         colnum |= cm->classbits[CC_PUNCT];
 2407 tgl                       697 GBC          32 :     assert(cm->classbits[CC_XDIGIT] == 0);
 2407 tgl                       698 CBC          32 :     if (cm->classbits[CC_SPACE] && pg_wc_isspace(c))
 2407 tgl                       699 UBC           0 :         colnum |= cm->classbits[CC_SPACE];
 2407 tgl                       700 CBC          32 :     if (cm->classbits[CC_LOWER] && pg_wc_islower(c))
 2407 tgl                       701 LBC           0 :         colnum |= cm->classbits[CC_LOWER];
 2407 tgl                       702 GIC          32 :     if (cm->classbits[CC_UPPER] && pg_wc_isupper(c))
 2407 tgl                       703 LBC           0 :         colnum |= cm->classbits[CC_UPPER];
 2407 tgl                       704 GIC          32 :     if (cm->classbits[CC_GRAPH] && pg_wc_isgraph(c))
                                705               3 :         colnum |= cm->classbits[CC_GRAPH];
                                706                 : 
                                707              32 :     return colnum;
                                708                 : }
                                709                 : 
                                710                 : /*
                                711                 :  * allcases - supply cvec for all case counterparts of a chr (including itself)
                                712                 :  *
 7368 tgl                       713 ECB             :  * This is a shortcut, preferably an efficient one, for simple characters;
                                714                 :  * messy cases are done via range().
                                715                 :  */
                                716                 : static struct cvec *
 2118 tgl                       717 GIC         867 : allcases(struct vars *v,        /* context */
                                718                 :          chr c)                 /* character to get case equivs of */
                                719                 : {
 7188 bruce                     720 ECB             :     struct cvec *cv;
                                721                 :     chr         lc,
                                722                 :                 uc;
 7368 tgl                       723                 : 
 2424 tgl                       724 CBC         867 :     lc = pg_wc_tolower(c);
                                725             867 :     uc = pg_wc_toupper(c);
 7368 tgl                       726 ECB             : 
 5533 tgl                       727 CBC         867 :     cv = getcvec(v, 2, 0);
 7188 bruce                     728 GIC         867 :     addchr(cv, lc);
                                729             867 :     if (lc != uc)
                                730             737 :         addchr(cv, uc);
                                731             867 :     return cv;
                                732                 : }
                                733                 : 
                                734                 : /*
                                735                 :  * cmp - chr-substring compare
                                736                 :  *
                                737                 :  * Backrefs need this.  It should preferably be efficient.
                                738                 :  * Note that it does not need to report anything except equal/unequal.
 7368 tgl                       739 ECB             :  * Note also that the length is exact, and the comparison should not
                                740                 :  * stop at embedded NULs!
                                741                 :  */
 7188 bruce                     742                 : static int                      /* 0 for equal, nonzero for unequal */
 7184 bruce                     743 GIC         642 : cmp(const chr *x, const chr *y, /* strings to compare */
                                744                 :     size_t len)                 /* exact length of comparison */
                                745                 : {
 7188                           746             642 :     return memcmp(VS(x), VS(y), len * sizeof(chr));
                                747                 : }
                                748                 : 
                                749                 : /*
                                750                 :  * casecmp - case-independent chr-substring compare
                                751                 :  *
                                752                 :  * REG_ICASE backrefs need this.  It should preferably be efficient.
                                753                 :  * Note that it does not need to report anything except equal/unequal.
 7368 tgl                       754 ECB             :  * Note also that the length is exact, and the comparison should not
                                755                 :  * stop at embedded NULs!
                                756                 :  */
 7188 bruce                     757                 : static int                      /* 0 for equal, nonzero for unequal */
 2118 tgl                       758 GIC           1 : casecmp(const chr *x, const chr *y, /* strings to compare */
 7368 tgl                       759 ECB             :         size_t len)             /* exact length of comparison */
 7368 tgl                       760 EUB             : {
 7188 bruce                     761 GIC           2 :     for (; len > 0; len--, x++, y++)
 7188 bruce                     762 ECB             :     {
 6911 tgl                       763 GIC           1 :         if ((*x != *y) && (pg_wc_tolower(*x) != pg_wc_tolower(*y)))
 7188 bruce                     764 UIC           0 :             return 1;
                                765                 :     }
 7188 bruce                     766 GIC           1 :     return 0;
                                767                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a