LCOV - differential code coverage report
Current view: top level - src/backend/regex - regexport.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage 16@8cea358b128 vs 17@8cea358b128 Lines: 89.5 % 76 68 8 68
Current Date: 2024-04-14 14:21:10 Functions: 90.9 % 11 10 1 10
Baseline: 16@8cea358b128 Branches: 61.4 % 88 54 34 54
Baseline Date: 2024-04-14 14:21:09 Line coverage date bins:
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed (240..) days: 89.5 % 76 68 8 68
Function coverage date bins:
(240..) days: 90.9 % 11 10 1 10
Branch coverage date bins:
(240..) days: 61.4 % 88 54 34 54

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * regexport.c
                                  4                 :                :  *    Functions for exporting info about a regex's NFA
                                  5                 :                :  *
                                  6                 :                :  * In this implementation, the NFA defines a necessary but not sufficient
                                  7                 :                :  * condition for a string to match the regex: that is, there can be strings
                                  8                 :                :  * that match the NFA but don't match the full regex, but not vice versa.
                                  9                 :                :  * Thus, for example, it is okay for the functions below to treat lookaround
                                 10                 :                :  * constraints as no-ops, since they merely constrain the string some more.
                                 11                 :                :  *
                                 12                 :                :  * Notice that these functions return info into caller-provided arrays
                                 13                 :                :  * rather than doing their own malloc's.  This simplifies the APIs by
                                 14                 :                :  * eliminating a class of error conditions, and in the case of colors
                                 15                 :                :  * allows the caller to decide how big is too big to bother with.
                                 16                 :                :  *
                                 17                 :                :  *
                                 18                 :                :  * Portions Copyright (c) 2013-2024, PostgreSQL Global Development Group
                                 19                 :                :  * Portions Copyright (c) 1998, 1999 Henry Spencer
                                 20                 :                :  *
                                 21                 :                :  * IDENTIFICATION
                                 22                 :                :  *    src/backend/regex/regexport.c
                                 23                 :                :  *
                                 24                 :                :  *-------------------------------------------------------------------------
                                 25                 :                :  */
                                 26                 :                : 
                                 27                 :                : #include "regex/regguts.h"
                                 28                 :                : 
                                 29                 :                : #include "regex/regexport.h"
                                 30                 :                : 
                                 31                 :                : 
                                 32                 :                : /*
                                 33                 :                :  * Get total number of NFA states.
                                 34                 :                :  */
                                 35                 :                : int
 4023 tgl@sss.pgh.pa.us          36                 :UBC           0 : pg_reg_getnumstates(const regex_t *regex)
                                 37                 :                : {
                                 38                 :                :     struct cnfa *cnfa;
                                 39                 :                : 
                                 40   [ #  #  #  # ]:              0 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                 41                 :              0 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                 42                 :                : 
                                 43                 :              0 :     return cnfa->nstates;
                                 44                 :                : }
                                 45                 :                : 
                                 46                 :                : /*
                                 47                 :                :  * Get initial state of NFA.
                                 48                 :                :  */
                                 49                 :                : int
 4023 tgl@sss.pgh.pa.us          50                 :CBC          65 : pg_reg_getinitialstate(const regex_t *regex)
                                 51                 :                : {
                                 52                 :                :     struct cnfa *cnfa;
                                 53                 :                : 
                                 54   [ +  -  -  + ]:             65 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                 55                 :             65 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                 56                 :                : 
                                 57                 :             65 :     return cnfa->pre;
                                 58                 :                : }
                                 59                 :                : 
                                 60                 :                : /*
                                 61                 :                :  * Get final state of NFA.
                                 62                 :                :  */
                                 63                 :                : int
                                 64                 :            963 : pg_reg_getfinalstate(const regex_t *regex)
                                 65                 :                : {
                                 66                 :                :     struct cnfa *cnfa;
                                 67                 :                : 
                                 68   [ +  -  -  + ]:            963 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                 69                 :            963 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                 70                 :                : 
                                 71                 :            963 :     return cnfa->post;
                                 72                 :                : }
                                 73                 :                : 
                                 74                 :                : /*
                                 75                 :                :  * pg_reg_getnumoutarcs() and pg_reg_getoutarcs() mask the existence of LACON
                                 76                 :                :  * arcs from the caller, treating any LACON as being automatically satisfied.
                                 77                 :                :  * Since the output representation does not support arcs that consume no
                                 78                 :                :  * character when traversed, we have to recursively traverse LACON arcs here,
                                 79                 :                :  * and report whatever normal arcs are reachable by traversing LACON arcs.
                                 80                 :                :  * Note that this wouldn't work if it were possible to reach the final state
                                 81                 :                :  * via LACON traversal, but the regex library never builds NFAs that have
                                 82                 :                :  * LACON arcs leading directly to the final state.  (This is because the
                                 83                 :                :  * regex executor is designed to consume one character beyond the nominal
                                 84                 :                :  * match end --- possibly an EOS indicator --- so there is always a set of
                                 85                 :                :  * ordinary arcs leading to the final state.)
                                 86                 :                :  *
                                 87                 :                :  * traverse_lacons is a recursive subroutine used by both exported functions
                                 88                 :                :  * to count and then emit the reachable regular arcs.  *arcs_count is
                                 89                 :                :  * incremented by the number of reachable arcs, and as many as will fit in
                                 90                 :                :  * arcs_len (possibly 0) are emitted into arcs[].
                                 91                 :                :  */
                                 92                 :                : static void
 2489                            93                 :           3290 : traverse_lacons(struct cnfa *cnfa, int st,
                                 94                 :                :                 int *arcs_count,
                                 95                 :                :                 regex_arc_t *arcs, int arcs_len)
                                 96                 :                : {
                                 97                 :                :     struct carc *ca;
                                 98                 :                : 
                                 99                 :                :     /*
                                100                 :                :      * Since this function recurses, it could theoretically be driven to stack
                                101                 :                :      * overflow.  In practice, this is mostly useful to backstop against a
                                102                 :                :      * failure of the regex compiler to remove a loop of LACON arcs.
                                103                 :                :      */
 2558                           104                 :           3290 :     check_stack_depth();
                                105                 :                : 
                                106         [ +  + ]:           8668 :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
                                107                 :                :     {
                                108         [ +  + ]:           5378 :         if (ca->co < cnfa->ncolors)
                                109                 :                :         {
                                110                 :                :             /* Ordinary arc, so count and possibly emit it */
                                111                 :           5372 :             int         ndx = (*arcs_count)++;
                                112                 :                : 
                                113         [ +  + ]:           5372 :             if (ndx < arcs_len)
                                114                 :                :             {
                                115                 :           2686 :                 arcs[ndx].co = ca->co;
                                116                 :           2686 :                 arcs[ndx].to = ca->to;
                                117                 :                :             }
                                118                 :                :         }
                                119                 :                :         else
                                120                 :                :         {
                                121                 :                :             /* LACON arc --- assume it's satisfied and recurse... */
                                122                 :                :             /* ... but first, assert it doesn't lead directly to post state */
                                123         [ -  + ]:              6 :             Assert(ca->to != cnfa->post);
                                124                 :                : 
                                125                 :              6 :             traverse_lacons(cnfa, ca->to, arcs_count, arcs, arcs_len);
                                126                 :                :         }
                                127                 :                :     }
                                128                 :           3290 : }
                                129                 :                : 
                                130                 :                : /*
                                131                 :                :  * Get number of outgoing NFA arcs of state number "st".
                                132                 :                :  */
                                133                 :                : int
 4023                           134                 :           1644 : pg_reg_getnumoutarcs(const regex_t *regex, int st)
                                135                 :                : {
                                136                 :                :     struct cnfa *cnfa;
                                137                 :                :     int         arcs_count;
                                138                 :                : 
                                139   [ +  -  -  + ]:           1644 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                140                 :           1644 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                141                 :                : 
                                142   [ +  -  -  + ]:           1644 :     if (st < 0 || st >= cnfa->nstates)
 4023 tgl@sss.pgh.pa.us         143                 :UBC           0 :         return 0;
 2558 tgl@sss.pgh.pa.us         144                 :CBC        1644 :     arcs_count = 0;
                                145                 :           1644 :     traverse_lacons(cnfa, st, &arcs_count, NULL, 0);
                                146                 :           1644 :     return arcs_count;
                                147                 :                : }
                                148                 :                : 
                                149                 :                : /*
                                150                 :                :  * Write array of outgoing NFA arcs of state number "st" into arcs[],
                                151                 :                :  * whose length arcs_len must be at least as long as indicated by
                                152                 :                :  * pg_reg_getnumoutarcs(), else not all arcs will be returned.
                                153                 :                :  */
                                154                 :                : void
 4023                           155                 :           1644 : pg_reg_getoutarcs(const regex_t *regex, int st,
                                156                 :                :                   regex_arc_t *arcs, int arcs_len)
                                157                 :                : {
                                158                 :                :     struct cnfa *cnfa;
                                159                 :                :     int         arcs_count;
                                160                 :                : 
                                161   [ +  -  -  + ]:           1644 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                162                 :           1644 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                163                 :                : 
                                164   [ +  -  +  -  :           1644 :     if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
                                              +  + ]
                                165                 :              4 :         return;
 2558                           166                 :           1640 :     arcs_count = 0;
                                167                 :           1640 :     traverse_lacons(cnfa, st, &arcs_count, arcs, arcs_len);
                                168                 :                : }
                                169                 :                : 
                                170                 :                : /*
                                171                 :                :  * Get total number of colors.
                                172                 :                :  */
                                173                 :                : int
 4023                           174                 :             65 : pg_reg_getnumcolors(const regex_t *regex)
                                175                 :                : {
                                176                 :                :     struct colormap *cm;
                                177                 :                : 
                                178   [ +  -  -  + ]:             65 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                179                 :             65 :     cm = &((struct guts *) regex->re_guts)->cmap;
                                180                 :                : 
                                181                 :             65 :     return cm->max + 1;
                                182                 :                : }
                                183                 :                : 
                                184                 :                : /*
                                185                 :                :  * Check if color is beginning of line/string.
                                186                 :                :  *
                                187                 :                :  * (We might at some point need to offer more refined handling of pseudocolors,
                                188                 :                :  * but this will do for now.)
                                189                 :                :  */
                                190                 :                : int
                                191                 :           1505 : pg_reg_colorisbegin(const regex_t *regex, int co)
                                192                 :                : {
                                193                 :                :     struct cnfa *cnfa;
                                194                 :                : 
                                195   [ +  -  -  + ]:           1505 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                196                 :           1505 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                197                 :                : 
                                198   [ +  +  +  + ]:           1505 :     if (co == cnfa->bos[0] || co == cnfa->bos[1])
                                199                 :            246 :         return true;
                                200                 :                :     else
                                201                 :           1259 :         return false;
                                202                 :                : }
                                203                 :                : 
                                204                 :                : /*
                                205                 :                :  * Check if color is end of line/string.
                                206                 :                :  */
                                207                 :                : int
                                208                 :           1259 : pg_reg_colorisend(const regex_t *regex, int co)
                                209                 :                : {
                                210                 :                :     struct cnfa *cnfa;
                                211                 :                : 
                                212   [ +  -  -  + ]:           1259 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                213                 :           1259 :     cnfa = &((struct guts *) regex->re_guts)->search;
                                214                 :                : 
                                215   [ +  +  +  + ]:           1259 :     if (co == cnfa->eos[0] || co == cnfa->eos[1])
                                216                 :            162 :         return true;
                                217                 :                :     else
                                218                 :           1097 :         return false;
                                219                 :                : }
                                220                 :                : 
                                221                 :                : /*
                                222                 :                :  * Get number of member chrs of color number "co".
                                223                 :                :  *
                                224                 :                :  * Note: we return -1 if the color number is invalid, or if it is a special
                                225                 :                :  * color (WHITE, RAINBOW, or a pseudocolor), or if the number of members is
                                226                 :                :  * uncertain.
                                227                 :                :  * Callers should not try to extract the members if -1 is returned.
                                228                 :                :  */
                                229                 :                : int
                                230                 :            531 : pg_reg_getnumcharacters(const regex_t *regex, int co)
                                231                 :                : {
                                232                 :                :     struct colormap *cm;
                                233                 :                : 
                                234   [ +  -  -  + ]:            531 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                235                 :            531 :     cm = &((struct guts *) regex->re_guts)->cmap;
                                236                 :                : 
 1149                           237   [ +  +  -  + ]:            531 :     if (co <= 0 || co > cm->max)   /* <= 0 rejects WHITE and RAINBOW */
 4023                           238                 :             65 :         return -1;
 2489                           239         [ +  + ]:            466 :     if (cm->cd[co].flags & PSEUDO)   /* also pseudocolors (BOS etc) */
 4023                           240                 :            260 :         return -1;
                                241                 :                : 
                                242                 :                :     /*
                                243                 :                :      * If the color appears anywhere in the high colormap, treat its number of
                                244                 :                :      * members as uncertain.  In principle we could determine all the specific
                                245                 :                :      * chrs corresponding to each such entry, but it would be expensive
                                246                 :                :      * (particularly if character class tests are required) and it doesn't
                                247                 :                :      * seem worth it.
                                248                 :                :      */
 2778                           249         [ -  + ]:            206 :     if (cm->cd[co].nuchrs != 0)
 2778 tgl@sss.pgh.pa.us         250                 :UBC           0 :         return -1;
                                251                 :                : 
                                252                 :                :     /* OK, return the known number of member chrs */
 2778 tgl@sss.pgh.pa.us         253                 :CBC         206 :     return cm->cd[co].nschrs;
                                254                 :                : }
                                255                 :                : 
                                256                 :                : /*
                                257                 :                :  * Write array of member chrs of color number "co" into chars[],
                                258                 :                :  * whose length chars_len must be at least as long as indicated by
                                259                 :                :  * pg_reg_getnumcharacters(), else not all chars will be returned.
                                260                 :                :  *
                                261                 :                :  * Fetching the members of WHITE, RAINBOW, or a pseudocolor is not supported.
                                262                 :                :  *
                                263                 :                :  * Caution: this is a relatively expensive operation.
                                264                 :                :  */
                                265                 :                : void
 4023                           266                 :            206 : pg_reg_getcharacters(const regex_t *regex, int co,
                                267                 :                :                      pg_wchar *chars, int chars_len)
                                268                 :                : {
                                269                 :                :     struct colormap *cm;
                                270                 :                :     chr         c;
                                271                 :                : 
                                272   [ +  -  -  + ]:            206 :     assert(regex != NULL && regex->re_magic == REMAGIC);
                                273                 :            206 :     cm = &((struct guts *) regex->re_guts)->cmap;
                                274                 :                : 
                                275   [ +  -  +  -  :            206 :     if (co <= 0 || co > cm->max || chars_len <= 0)
                                              -  + ]
 4023 tgl@sss.pgh.pa.us         276                 :UBC           0 :         return;
 4023 tgl@sss.pgh.pa.us         277         [ -  + ]:CBC         206 :     if (cm->cd[co].flags & PSEUDO)
 4023 tgl@sss.pgh.pa.us         278                 :UBC           0 :         return;
                                279                 :                : 
                                280                 :                :     /*
                                281                 :                :      * We need only examine the low character map; there should not be any
                                282                 :                :      * matching entries in the high map.
                                283                 :                :      */
 2778 tgl@sss.pgh.pa.us         284         [ +  - ]:CBC       20208 :     for (c = CHR_MIN; c <= MAX_SIMPLE_CHR; c++)
                                285                 :                :     {
                                286         [ +  + ]:          20208 :         if (cm->locolormap[c - CHR_MIN] == co)
                                287                 :                :         {
                                288                 :            785 :             *chars++ = c;
                                289         [ +  + ]:            785 :             if (--chars_len == 0)
                                290                 :            206 :                 break;
                                291                 :                :         }
                                292                 :                :     }
                                293                 :                : }
        

Generated by: LCOV version 2.1-beta2-3-g6141622