LCOV - differential code coverage report
Current view: top level - src/backend/regex - regprefix.c (source / functions) Coverage Total Hit UBC CBC
Current: Differential Code Coverage HEAD vs 15 Lines: 86.6 % 82 71 11 71
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 2 2 2
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 (240..) days: 86.6 % 82 71 11 71
Legend: Lines: hit not hit Function coverage date bins:
(240..) days: 100.0 % 2 2 2

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*-------------------------------------------------------------------------
                                  2                 :  *
                                  3                 :  * regprefix.c
                                  4                 :  *    Extract a common prefix, if any, from a compiled regex.
                                  5                 :  *
                                  6                 :  *
                                  7                 :  * Portions Copyright (c) 2012-2023, PostgreSQL Global Development Group
                                  8                 :  * Portions Copyright (c) 1998, 1999 Henry Spencer
                                  9                 :  *
                                 10                 :  * IDENTIFICATION
                                 11                 :  *    src/backend/regex/regprefix.c
                                 12                 :  *
                                 13                 :  *-------------------------------------------------------------------------
                                 14                 :  */
                                 15                 : 
                                 16                 : #include "regex/regguts.h"
                                 17                 : 
                                 18                 : 
                                 19                 : /*
                                 20                 :  * forward declarations
                                 21                 :  */
                                 22                 : static int  findprefix(struct cnfa *cnfa, struct colormap *cm,
                                 23                 :                        chr *string, size_t *slength);
                                 24                 : 
                                 25                 : 
                                 26                 : /*
                                 27                 :  * pg_regprefix - get common prefix for regular expression
                                 28                 :  *
                                 29                 :  * Returns one of:
                                 30                 :  *  REG_NOMATCH: there is no common prefix of strings matching the regex
                                 31                 :  *  REG_PREFIX: there is a common prefix of strings matching the regex
                                 32                 :  *  REG_EXACT: all strings satisfying the regex must match the same string
                                 33                 :  *  or a REG_XXX error code
                                 34                 :  *
                                 35                 :  * In the non-failure cases, *string is set to a palloc'd string containing
                                 36                 :  * the common prefix or exact value, of length *slength (measured in chrs
                                 37                 :  * not bytes!).
                                 38                 :  *
                                 39                 :  * This function does not analyze all complex cases (such as lookaround
                                 40                 :  * constraints) exactly.  Therefore it is possible that some strings matching
                                 41                 :  * the reported prefix or exact-match string do not satisfy the regex.  But
                                 42                 :  * it should never be the case that a string satisfying the regex does not
                                 43                 :  * match the reported prefix or exact-match string.
                                 44                 :  */
                                 45                 : int
 3925 tgl                        46 CBC        6449 : pg_regprefix(regex_t *re,
                                 47                 :              chr **string,
                                 48                 :              size_t *slength)
                                 49                 : {
                                 50                 :     struct guts *g;
                                 51                 :     struct cnfa *cnfa;
                                 52                 :     int         st;
                                 53                 : 
                                 54                 :     /* sanity checks */
                                 55            6449 :     if (string == NULL || slength == NULL)
 3925 tgl                        56 UBC           0 :         return REG_INVARG;
 3925 tgl                        57 CBC        6449 :     *string = NULL;             /* initialize for failure cases */
                                 58            6449 :     *slength = 0;
                                 59            6449 :     if (re == NULL || re->re_magic != REMAGIC)
 3925 tgl                        60 UBC           0 :         return REG_INVARG;
 3925 tgl                        61 CBC        6449 :     if (re->re_csize != sizeof(chr))
 3925 tgl                        62 UBC           0 :         return REG_MIXED;
                                 63                 : 
                                 64                 :     /* Initialize locale-dependent support */
 3925 tgl                        65 CBC        6449 :     pg_set_regex_collation(re->re_collation);
                                 66                 : 
                                 67                 :     /* setup */
                                 68            6449 :     g = (struct guts *) re->re_guts;
                                 69            6449 :     if (g->info & REG_UIMPOSSIBLE)
                                 70               1 :         return REG_NOMATCH;
                                 71                 : 
                                 72                 :     /*
                                 73                 :      * This implementation considers only the search NFA for the topmost regex
                                 74                 :      * tree node.  Therefore, constraints such as backrefs are not fully
                                 75                 :      * applied, which is allowed per the function's API spec.
                                 76                 :      */
                                 77            6448 :     assert(g->tree != NULL);
                                 78            6448 :     cnfa = &g->tree->cnfa;
                                 79                 : 
                                 80                 :     /* matchall NFAs never have a fixed prefix */
  778                            81            6448 :     if (cnfa->flags & MATCHALL)
                                 82               6 :         return REG_NOMATCH;
                                 83                 : 
                                 84                 :     /*
                                 85                 :      * Since a correct NFA should never contain any exit-free loops, it should
                                 86                 :      * not be possible for our traversal to return to a previously visited NFA
                                 87                 :      * state.  Hence we need at most nstates chrs in the output string.
                                 88                 :      */
 3925                            89            6442 :     *string = (chr *) MALLOC(cnfa->nstates * sizeof(chr));
                                 90            6442 :     if (*string == NULL)
 3925 tgl                        91 UBC           0 :         return REG_ESPACE;
                                 92                 : 
                                 93                 :     /* do it */
 3925 tgl                        94 CBC        6442 :     st = findprefix(cnfa, &g->cmap, *string, slength);
                                 95                 : 
                                 96            6442 :     assert(*slength <= cnfa->nstates);
                                 97                 : 
                                 98                 :     /* clean up */
                                 99            6442 :     if (st != REG_PREFIX && st != REG_EXACT)
                                100                 :     {
                                101             357 :         FREE(*string);
                                102             357 :         *string = NULL;
                                103             357 :         *slength = 0;
                                104                 :     }
                                105                 : 
                                106            6442 :     return st;
                                107                 : }
                                108                 : 
                                109                 : /*
                                110                 :  * findprefix - extract common prefix from cNFA
                                111                 :  *
                                112                 :  * Results are returned into the preallocated chr array string[], with
                                113                 :  * *slength (which must be preset to zero) incremented for each chr.
                                114                 :  */
                                115                 : static int                      /* regprefix return code */
 2118                           116            6442 : findprefix(struct cnfa *cnfa,
                                117                 :            struct colormap *cm,
                                118                 :            chr *string,
                                119                 :            size_t *slength)
                                120                 : {
                                121                 :     int         st;
                                122                 :     int         nextst;
                                123                 :     color       thiscolor;
                                124                 :     chr         c;
                                125                 :     struct carc *ca;
                                126                 : 
                                127                 :     /*
                                128                 :      * The "pre" state must have only BOS/BOL outarcs, else pattern isn't
                                129                 :      * anchored left.  If we have both BOS and BOL, they must go to the same
                                130                 :      * next state.
                                131                 :      */
 3925                           132            6442 :     st = cnfa->pre;
                                133            6442 :     nextst = -1;
                                134           12621 :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
                                135                 :     {
                                136            6448 :         if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
                                137                 :         {
                                138            6185 :             if (nextst == -1)
                                139            6179 :                 nextst = ca->to;
                                140               6 :             else if (nextst != ca->to)
                                141               6 :                 return REG_NOMATCH;
                                142                 :         }
                                143                 :         else
                                144             263 :             return REG_NOMATCH;
                                145                 :     }
                                146            6173 :     if (nextst == -1)
 3925 tgl                       147 UBC           0 :         return REG_NOMATCH;
                                148                 : 
                                149                 :     /*
                                150                 :      * Scan through successive states, stopping as soon as we find one with
                                151                 :      * more than one acceptable transition character (either multiple colors
                                152                 :      * on out-arcs, or a color with more than one member chr).
                                153                 :      *
                                154                 :      * We could find a state with multiple out-arcs that are all labeled with
                                155                 :      * the same singleton color; this comes from patterns like "^ab(cde|cxy)".
                                156                 :      * In that case we add the chr "c" to the output string but then exit the
                                157                 :      * loop with nextst == -1.  This leaves a little bit on the table: if the
                                158                 :      * pattern is like "^ab(cde|cdy)", we won't notice that "d" could be added
                                159                 :      * to the prefix.  But chasing multiple parallel state chains doesn't seem
                                160                 :      * worth the trouble.
                                161                 :      */
                                162                 :     do
                                163                 :     {
 3925 tgl                       164 CBC       79814 :         st = nextst;
                                165           79814 :         nextst = -1;
                                166           79814 :         thiscolor = COLORLESS;
                                167          153535 :         for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
                                168                 :         {
                                169                 :             /* We can ignore BOS/BOL arcs */
                                170           79835 :             if (ca->co == cnfa->bos[0] || ca->co == cnfa->bos[1])
 3925 tgl                       171 UBC           0 :                 continue;
                                172                 : 
                                173                 :             /*
                                174                 :              * ... but EOS/EOL arcs terminate the search, as do RAINBOW arcs
                                175                 :              * and LACONs
                                176                 :              */
 2729 tgl                       177 CBC       79835 :             if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1] ||
  778                           178           74376 :                 ca->co == RAINBOW || ca->co >= cnfa->ncolors)
                                179                 :             {
 3925                           180            6105 :                 thiscolor = COLORLESS;
                                181            6105 :                 break;
                                182                 :             }
                                183           73730 :             if (thiscolor == COLORLESS)
                                184                 :             {
                                185                 :                 /* First plain outarc */
                                186           73715 :                 thiscolor = ca->co;
                                187           73715 :                 nextst = ca->to;
                                188                 :             }
                                189              15 :             else if (thiscolor == ca->co)
                                190                 :             {
                                191                 :                 /* Another plain outarc for same color */
                                192               6 :                 nextst = -1;
                                193                 :             }
                                194                 :             else
                                195                 :             {
                                196                 :                 /* More than one plain outarc color terminates the search */
                                197               9 :                 thiscolor = COLORLESS;
                                198               9 :                 break;
                                199                 :             }
                                200                 :         }
                                201                 :         /* Done if we didn't find exactly one color on plain outarcs */
                                202           79814 :         if (thiscolor == COLORLESS)
                                203            6114 :             break;
                                204                 :         /* The color must be a singleton */
 2407                           205           73700 :         if (cm->cd[thiscolor].nschrs != 1)
                                206              53 :             break;
                                207                 :         /* Must not have any high-color-map entries */
                                208           73647 :         if (cm->cd[thiscolor].nuchrs != 0)
 3925 tgl                       209 UBC           0 :             break;
                                210                 : 
                                211                 :         /*
                                212                 :          * Identify the color's sole member chr and add it to the prefix
                                213                 :          * string.  In general the colormap data structure doesn't provide a
                                214                 :          * way to find color member chrs, except by trying GETCOLOR() on each
                                215                 :          * possible chr value, which won't do at all.  However, for the cases
                                216                 :          * we care about it should be sufficient to test the "firstchr" value,
                                217                 :          * that is the first chr ever added to the color.  There are cases
                                218                 :          * where this might no longer be a member of the color (so we do need
                                219                 :          * to test), but none of them are likely to arise for a character that
                                220                 :          * is a member of a common prefix.  If we do hit such a corner case,
                                221                 :          * we just fall out without adding anything to the prefix string.
                                222                 :          */
 3925 tgl                       223 CBC       73647 :         c = cm->cd[thiscolor].firstchr;
                                224           73647 :         if (GETCOLOR(cm, c) != thiscolor)
 3925 tgl                       225 UBC           0 :             break;
                                226                 : 
 3925 tgl                       227 CBC       73647 :         string[(*slength)++] = c;
                                228                 : 
                                229                 :         /* Advance to next state, but only if we have a unique next state */
                                230           73647 :     } while (nextst != -1);
                                231                 : 
                                232                 :     /*
                                233                 :      * If we ended at a state that only has EOS/EOL outarcs leading to the
                                234                 :      * "post" state, then we have an exact-match string.  Note this is true
                                235                 :      * even if the string is of zero length.
                                236                 :      */
                                237            6173 :     nextst = -1;
                                238           11632 :     for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
                                239                 :     {
                                240            6173 :         if (ca->co == cnfa->eos[0] || ca->co == cnfa->eos[1])
                                241                 :         {
                                242            5459 :             if (nextst == -1)
                                243            5459 :                 nextst = ca->to;
 3925 tgl                       244 UBC           0 :             else if (nextst != ca->to)
                                245                 :             {
                                246               0 :                 nextst = -1;
                                247               0 :                 break;
                                248                 :             }
                                249                 :         }
                                250                 :         else
                                251                 :         {
 3925 tgl                       252 CBC         714 :             nextst = -1;
                                253             714 :             break;
                                254                 :         }
                                255                 :     }
                                256            6173 :     if (nextst == cnfa->post)
                                257            5459 :         return REG_EXACT;
                                258                 : 
                                259                 :     /*
                                260                 :      * Otherwise, if we were unable to identify any prefix characters, say
                                261                 :      * NOMATCH --- the pattern is anchored left, but doesn't specify any
                                262                 :      * particular first character.
                                263                 :      */
                                264             714 :     if (*slength > 0)
                                265             626 :         return REG_PREFIX;
                                266                 : 
                                267              88 :     return REG_NOMATCH;
                                268                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a