LCOV - differential code coverage report
Current view: top level - src/backend/regex - rege_dfa.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 90.0 % 462 416 5 5 36 2 55 1 358 8 52 1
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 13 13 3 1 9 3
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (240..) days: 90.0 % 461 415 5 5 36 2 55 358 8 52
Function coverage date bins:
(240..) days: 81.2 % 16 13 3 1 9 3

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*
                                  2                 :  * DFA routines
                                  3                 :  * This file is #included by regexec.c.
                                  4                 :  *
                                  5                 :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
                                  6                 :  *
                                  7                 :  * Development of this software was funded, in part, by Cray Research Inc.,
                                  8                 :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
                                  9                 :  * Corporation, none of whom are responsible for the results.  The author
                                 10                 :  * thanks all of them.
                                 11                 :  *
                                 12                 :  * Redistribution and use in source and binary forms -- with or without
                                 13                 :  * modification -- are permitted for any purpose, provided that
                                 14                 :  * redistributions in source form retain this entire copyright notice and
                                 15                 :  * indicate the origin and nature of any modifications.
                                 16                 :  *
                                 17                 :  * I'd appreciate being given credit for this package in the documentation
                                 18                 :  * of software which uses it, but that is not a requirement.
                                 19                 :  *
                                 20                 :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
                                 21                 :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
                                 22                 :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
                                 23                 :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
                                 24                 :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                                 25                 :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                                 26                 :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                                 27                 :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                                 28                 :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                                 29                 :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                                 30                 :  *
                                 31                 :  * src/backend/regex/rege_dfa.c
                                 32                 :  *
                                 33                 :  */
                                 34                 : 
                                 35                 : /*
                                 36                 :  * longest - longest-preferred matching engine
                                 37                 :  *
                                 38                 :  * On success, returns match endpoint address.  Returns NULL on no match.
                                 39                 :  * Internal errors also return NULL, with v->err set.
                                 40                 :  */
                                 41                 : static chr *
 2118 tgl                        42 CBC      468763 : longest(struct vars *v,
                                 43                 :         struct dfa *d,
                                 44                 :         chr *start,             /* where the match should start */
                                 45                 :         chr *stop,              /* match must end at or before here */
                                 46                 :         int *hitstopp)          /* record whether hit v->stop, if non-NULL */
                                 47                 : {
                                 48                 :     chr        *cp;
 7188 bruce                      49          468763 :     chr        *realstop = (stop == v->stop) ? stop : stop + 1;
                                 50                 :     color       co;
                                 51                 :     struct sset *css;
                                 52                 :     struct sset *ss;
                                 53                 :     chr        *post;
                                 54                 :     int         i;
 7368 tgl                        55          468763 :     struct colormap *cm = d->cm;
                                 56                 : 
                                 57                 :     /* prevent "uninitialized variable" warnings */
 2746                            58          468763 :     if (hitstopp != NULL)
                                 59          451848 :         *hitstopp = 0;
                                 60                 : 
                                 61                 :     /* if this is a backref to a known string, just match against that */
  768                            62          468763 :     if (d->backno >= 0)
                                 63                 :     {
                                 64             793 :         assert((size_t) d->backno < v->nmatch);
                                 65             793 :         if (v->pmatch[d->backno].rm_so >= 0)
                                 66                 :         {
                                 67             616 :             cp = dfa_backref(v, d, start, start, stop, false);
                                 68             616 :             if (cp == v->stop && stop == v->stop && hitstopp != NULL)
  768 tgl                        69 UBC           0 :                 *hitstopp = 1;
  768 tgl                        70 CBC         616 :             return cp;
                                 71                 :         }
                                 72                 :     }
                                 73                 : 
                                 74                 :     /* fast path for matchall NFAs */
  778                            75          468147 :     if (d->cnfa->flags & MATCHALL)
                                 76                 :     {
                                 77            2495 :         size_t      nchr = stop - start;
                                 78            2495 :         size_t      maxmatchall = d->cnfa->maxmatchall;
                                 79                 : 
                                 80            2495 :         if (nchr < d->cnfa->minmatchall)
                                 81             165 :             return NULL;
                                 82            2330 :         if (maxmatchall == DUPINF)
                                 83                 :         {
                                 84            1488 :             if (stop == v->stop && hitstopp != NULL)
                                 85               5 :                 *hitstopp = 1;
                                 86                 :         }
                                 87                 :         else
                                 88                 :         {
                                 89             842 :             if (stop == v->stop && nchr <= maxmatchall + 1 && hitstopp != NULL)
                                 90              84 :                 *hitstopp = 1;
                                 91             842 :             if (nchr > maxmatchall)
                                 92             572 :                 return start + maxmatchall;
                                 93                 :         }
                                 94            1758 :         return stop;
                                 95                 :     }
                                 96                 : 
                                 97                 :     /* initialize */
 7368                            98          465652 :     css = initialize(v, d, start);
 2746                            99          465652 :     if (css == NULL)
 2746 tgl                       100 UBC           0 :         return NULL;
 7368 tgl                       101 CBC      465652 :     cp = start;
                                102                 : 
                                103                 :     /* startup */
                                104                 :     FDEBUG(("+++ startup +++\n"));
 7188 bruce                     105          465652 :     if (cp == v->start)
                                106                 :     {
                                107            1951 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
                                108                 :         FDEBUG(("color %ld\n", (long) co));
                                109                 :     }
                                110                 :     else
                                111                 :     {
 7368 tgl                       112          463701 :         co = GETCOLOR(cm, *(cp - 1));
                                113                 :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
                                114                 :     }
                                115          465652 :     css = miss(v, d, css, co, cp, start);
                                116          465652 :     if (css == NULL)
                                117             216 :         return NULL;
                                118          465436 :     css->lastseen = cp;
                                119                 : 
                                120                 :     /*
                                121                 :      * This is the main text-scanning loop.  It seems worth having two copies
                                122                 :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
                                123                 :      * builds, when you're not actively tracing.
                                124                 :      */
                                125                 : #ifdef REG_DEBUG
                                126                 :     if (v->eflags & REG_FTRACE)
                                127                 :     {
                                128                 :         while (cp < realstop)
                                129                 :         {
                                130                 :             FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
                                131                 :             co = GETCOLOR(cm, *cp);
                                132                 :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
                                133                 :             ss = css->outs[co];
                                134                 :             if (ss == NULL)
                                135                 :             {
                                136                 :                 ss = miss(v, d, css, co, cp + 1, start);
                                137                 :                 if (ss == NULL)
                                138                 :                     break;      /* NOTE BREAK OUT */
                                139                 :             }
                                140                 :             cp++;
                                141                 :             ss->lastseen = cp;
                                142                 :             css = ss;
                                143                 :         }
                                144                 :     }
                                145                 :     else
                                146                 : #endif
                                147                 :     {
 7188 bruce                     148         5922947 :         while (cp < realstop)
                                149                 :         {
 7368 tgl                       150         5910485 :             co = GETCOLOR(cm, *cp);
                                151         5910485 :             ss = css->outs[co];
 7188 bruce                     152         5910485 :             if (ss == NULL)
                                153                 :             {
                                154         1483789 :                 ss = miss(v, d, css, co, cp + 1, start);
 7368 tgl                       155         1483789 :                 if (ss == NULL)
 7188 bruce                     156          452974 :                     break;      /* NOTE BREAK OUT */
                                157                 :             }
 7368 tgl                       158         5457511 :             cp++;
                                159         5457511 :             ss->lastseen = cp;
                                160         5457511 :             css = ss;
                                161                 :         }
                                162                 :     }
                                163                 : 
 2746                           164          465436 :     if (ISERR())
 2746 tgl                       165 UBC           0 :         return NULL;
                                166                 : 
                                167                 :     /* shutdown */
                                168                 :     FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
 7188 bruce                     169 CBC      465436 :     if (cp == v->stop && stop == v->stop)
                                170                 :     {
 7368 tgl                       171            6546 :         if (hitstopp != NULL)
                                172            3091 :             *hitstopp = 1;
 7188 bruce                     173            6546 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
                                174                 :         FDEBUG(("color %ld\n", (long) co));
 7368 tgl                       175            6546 :         ss = miss(v, d, css, co, cp, start);
 2746                           176            6546 :         if (ISERR())
 2746 tgl                       177 UBC           0 :             return NULL;
                                178                 :         /* special case:  match ended at eol? */
 7188 bruce                     179 CBC        6546 :         if (ss != NULL && (ss->flags & POSTSTATE))
 7368 tgl                       180            3501 :             return cp;
                                181            3045 :         else if (ss != NULL)
 7368 tgl                       182 UBC           0 :             ss->lastseen = cp;   /* to be tidy */
                                183                 :     }
                                184                 : 
                                185                 :     /* find last match, if any */
 7368 tgl                       186 CBC      461935 :     post = d->lastpost;
                                187         2393081 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
 7188 bruce                     188         1931146 :         if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
                                189            7210 :             (post == NULL || post < ss->lastseen))
 7368 tgl                       190          466755 :             post = ss->lastseen;
 7188 bruce                     191          461935 :     if (post != NULL)           /* found one */
 7368 tgl                       192          459927 :         return post - 1;
                                193                 : 
                                194            2008 :     return NULL;
                                195                 : }
                                196                 : 
                                197                 : /*
                                198                 :  * shortest - shortest-preferred matching engine
                                199                 :  *
                                200                 :  * On success, returns match endpoint address.  Returns NULL on no match.
                                201                 :  * Internal errors also return NULL, with v->err set.
                                202                 :  */
                                203                 : static chr *
 2118                           204          902023 : shortest(struct vars *v,
                                205                 :          struct dfa *d,
                                206                 :          chr *start,            /* where the match should start */
                                207                 :          chr *min,              /* match must end at or after here */
                                208                 :          chr *max,              /* match must end at or before here */
                                209                 :          chr **coldp,           /* store coldstart pointer here, if non-NULL */
                                210                 :          int *hitstopp)         /* record whether hit v->stop, if non-NULL */
                                211                 : {
                                212                 :     chr        *cp;
 7188 bruce                     213          902023 :     chr        *realmin = (min == v->stop) ? min : min + 1;
                                214          902023 :     chr        *realmax = (max == v->stop) ? max : max + 1;
                                215                 :     color       co;
                                216                 :     struct sset *css;
                                217                 :     struct sset *ss;
 7368 tgl                       218          902023 :     struct colormap *cm = d->cm;
                                219                 : 
                                220                 :     /* prevent "uninitialized variable" warnings */
 2746                           221          902023 :     if (coldp != NULL)
                                222          901194 :         *coldp = NULL;
                                223          902023 :     if (hitstopp != NULL)
                                224             158 :         *hitstopp = 0;
                                225                 : 
                                226                 :     /* if this is a backref to a known string, just match against that */
  768                           227          902023 :     if (d->backno >= 0)
                                228                 :     {
  768 tgl                       229 UBC           0 :         assert((size_t) d->backno < v->nmatch);
                                230               0 :         if (v->pmatch[d->backno].rm_so >= 0)
                                231                 :         {
                                232               0 :             cp = dfa_backref(v, d, start, min, max, true);
                                233               0 :             if (cp != NULL && coldp != NULL)
                                234               0 :                 *coldp = start;
                                235                 :             /* there is no case where we should set *hitstopp */
                                236               0 :             return cp;
                                237                 :         }
                                238                 :     }
                                239                 : 
                                240                 :     /* fast path for matchall NFAs */
  778 tgl                       241 CBC      902023 :     if (d->cnfa->flags & MATCHALL)
                                242                 :     {
                                243             986 :         size_t      nchr = min - start;
                                244                 : 
                                245             986 :         if (d->cnfa->maxmatchall != DUPINF &&
                                246              12 :             nchr > d->cnfa->maxmatchall)
  778 tgl                       247 UBC           0 :             return NULL;
  778 tgl                       248 CBC         986 :         if ((max - start) < d->cnfa->minmatchall)
                                249               9 :             return NULL;
                                250             977 :         if (nchr < d->cnfa->minmatchall)
                                251              65 :             min = start + d->cnfa->minmatchall;
                                252             977 :         if (coldp != NULL)
                                253             447 :             *coldp = start;
                                254                 :         /* there is no case where we should set *hitstopp */
                                255             977 :         return min;
                                256                 :     }
                                257                 : 
                                258                 :     /* initialize */
 7368                           259          901037 :     css = initialize(v, d, start);
 2746                           260          901037 :     if (css == NULL)
 2746 tgl                       261 UBC           0 :         return NULL;
 7368 tgl                       262 CBC      901037 :     cp = start;
                                263                 : 
                                264                 :     /* startup */
                                265                 :     FDEBUG(("--- startup ---\n"));
 7188 bruce                     266          901037 :     if (cp == v->start)
                                267                 :     {
                                268          453417 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
                                269                 :         FDEBUG(("color %ld\n", (long) co));
                                270                 :     }
                                271                 :     else
                                272                 :     {
 7368 tgl                       273          447620 :         co = GETCOLOR(cm, *(cp - 1));
                                274                 :         FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
                                275                 :     }
                                276          901037 :     css = miss(v, d, css, co, cp, start);
                                277          901037 :     if (css == NULL)
                                278               6 :         return NULL;
                                279          901031 :     css->lastseen = cp;
                                280          901031 :     ss = css;
                                281                 : 
                                282                 :     /*
                                283                 :      * This is the main text-scanning loop.  It seems worth having two copies
                                284                 :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
                                285                 :      * builds, when you're not actively tracing.
                                286                 :      */
                                287                 : #ifdef REG_DEBUG
                                288                 :     if (v->eflags & REG_FTRACE)
                                289                 :     {
                                290                 :         while (cp < realmax)
                                291                 :         {
                                292                 :             FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
                                293                 :             co = GETCOLOR(cm, *cp);
                                294                 :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
                                295                 :             ss = css->outs[co];
                                296                 :             if (ss == NULL)
                                297                 :             {
                                298                 :                 ss = miss(v, d, css, co, cp + 1, start);
                                299                 :                 if (ss == NULL)
                                300                 :                     break;      /* NOTE BREAK OUT */
                                301                 :             }
                                302                 :             cp++;
                                303                 :             ss->lastseen = cp;
                                304                 :             css = ss;
                                305                 :             if ((ss->flags & POSTSTATE) && cp >= realmin)
                                306                 :                 break;          /* NOTE BREAK OUT */
                                307                 :         }
                                308                 :     }
                                309                 :     else
                                310                 : #endif
                                311                 :     {
 7188 bruce                     312        18093658 :         while (cp < realmax)
                                313                 :         {
 7368 tgl                       314        17850289 :             co = GETCOLOR(cm, *cp);
                                315        17850289 :             ss = css->outs[co];
 7188 bruce                     316        17850289 :             if (ss == NULL)
                                317                 :             {
                                318         3123250 :                 ss = miss(v, d, css, co, cp + 1, start);
 7368 tgl                       319         3123250 :                 if (ss == NULL)
 7188 bruce                     320           96412 :                     break;      /* NOTE BREAK OUT */
                                321                 :             }
 7368 tgl                       322        17753877 :             cp++;
                                323        17753877 :             ss->lastseen = cp;
                                324        17753877 :             css = ss;
 7188 bruce                     325        17753877 :             if ((ss->flags & POSTSTATE) && cp >= realmin)
                                326          561250 :                 break;          /* NOTE BREAK OUT */
                                327                 :         }
                                328                 :     }
                                329                 : 
 7368 tgl                       330          901031 :     if (ss == NULL)
                                331           96412 :         return NULL;
                                332                 : 
 6385 bruce                     333          804619 :     if (coldp != NULL)          /* report last no-progress state set, if any */
 7368 tgl                       334          804352 :         *coldp = lastcold(v, d);
                                335                 : 
 7188 bruce                     336          804619 :     if ((ss->flags & POSTSTATE) && cp > min)
                                337                 :     {
 7368 tgl                       338          561234 :         assert(cp >= realmin);
                                339          561234 :         cp--;
                                340                 :     }
 7188 bruce                     341          243385 :     else if (cp == v->stop && max == v->stop)
                                342                 :     {
                                343          243385 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
                                344                 :         FDEBUG(("color %ld\n", (long) co));
 7368 tgl                       345          243385 :         ss = miss(v, d, css, co, cp, start);
                                346                 :         /* match might have ended at eol */
 7188 bruce                     347          243385 :         if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
 7368 tgl                       348               6 :             *hitstopp = 1;
                                349                 :     }
                                350                 : 
 7188 bruce                     351          804619 :     if (ss == NULL || !(ss->flags & POSTSTATE))
 7368 tgl                       352          234384 :         return NULL;
                                353                 : 
                                354          570235 :     return cp;
                                355                 : }
                                356                 : 
                                357                 : /*
                                358                 :  * matchuntil - incremental matching engine
                                359                 :  *
                                360                 :  * This is meant for use with a search-style NFA (that is, the pattern is
                                361                 :  * known to act as though it had a leading .*).  We determine whether a
                                362                 :  * match exists starting at v->start and ending at probe.  Multiple calls
                                363                 :  * require only O(N) time not O(N^2) so long as the probe values are
                                364                 :  * nondecreasing.  *lastcss and *lastcp must be initialized to NULL before
                                365                 :  * starting a series of calls.
                                366                 :  *
                                367                 :  * Returns 1 if a match exists, 0 if not.
                                368                 :  * Internal errors also return 0, with v->err set.
                                369                 :  */
                                370                 : static int
 2118                           371              60 : matchuntil(struct vars *v,
                                372                 :            struct dfa *d,
                                373                 :            chr *probe,          /* we want to know if a match ends here */
                                374                 :            struct sset **lastcss,   /* state storage across calls */
                                375                 :            chr **lastcp)        /* state storage across calls */
                                376                 : {
 2718                           377              60 :     chr        *cp = *lastcp;
                                378                 :     color       co;
                                379              60 :     struct sset *css = *lastcss;
                                380                 :     struct sset *ss;
                                381              60 :     struct colormap *cm = d->cm;
                                382                 : 
                                383                 :     /* fast path for matchall NFAs */
  778                           384              60 :     if (d->cnfa->flags & MATCHALL)
                                385                 :     {
                                386              18 :         size_t      nchr = probe - v->start;
                                387                 : 
                                388              18 :         if (nchr < d->cnfa->minmatchall)
                                389               9 :             return 0;
                                390                 :         /* maxmatchall will always be infinity, cf. makesearch() */
  590                           391               9 :         assert(d->cnfa->maxmatchall == DUPINF);
  778                           392               9 :         return 1;
                                393                 :     }
                                394                 : 
                                395                 :     /* initialize and startup, or restart, if necessary */
 2718                           396              42 :     if (cp == NULL || cp > probe)
                                397                 :     {
                                398              12 :         cp = v->start;
                                399              12 :         css = initialize(v, d, cp);
                                400              12 :         if (css == NULL)
 2718 tgl                       401 UBC           0 :             return 0;
                                402                 : 
                                403                 :         FDEBUG((">>> startup >>>\n"));
 2718 tgl                       404 CBC          12 :         co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
                                405                 :         FDEBUG(("color %ld\n", (long) co));
                                406                 : 
                                407              12 :         css = miss(v, d, css, co, cp, v->start);
                                408              12 :         if (css == NULL)
 2718 tgl                       409 UBC           0 :             return 0;
 2718 tgl                       410 CBC          12 :         css->lastseen = cp;
                                411                 :     }
                                412              30 :     else if (css == NULL)
                                413                 :     {
                                414                 :         /* we previously found that no match is possible beyond *lastcp */
 2718 tgl                       415 UBC           0 :         return 0;
                                416                 :     }
 2718 tgl                       417 CBC          42 :     ss = css;
                                418                 : 
                                419                 :     /*
                                420                 :      * This is the main text-scanning loop.  It seems worth having two copies
                                421                 :      * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
                                422                 :      * builds, when you're not actively tracing.
                                423                 :      */
                                424                 : #ifdef REG_DEBUG
                                425                 :     if (v->eflags & REG_FTRACE)
                                426                 :     {
                                427                 :         while (cp < probe)
                                428                 :         {
                                429                 :             FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
                                430                 :             co = GETCOLOR(cm, *cp);
                                431                 :             FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
                                432                 :             ss = css->outs[co];
                                433                 :             if (ss == NULL)
                                434                 :             {
                                435                 :                 ss = miss(v, d, css, co, cp + 1, v->start);
                                436                 :                 if (ss == NULL)
                                437                 :                     break;      /* NOTE BREAK OUT */
                                438                 :             }
                                439                 :             cp++;
                                440                 :             ss->lastseen = cp;
                                441                 :             css = ss;
                                442                 :         }
                                443                 :     }
                                444                 :     else
                                445                 : #endif
                                446                 :     {
                                447              90 :         while (cp < probe)
                                448                 :         {
                                449              48 :             co = GETCOLOR(cm, *cp);
                                450              48 :             ss = css->outs[co];
                                451              48 :             if (ss == NULL)
                                452                 :             {
                                453               6 :                 ss = miss(v, d, css, co, cp + 1, v->start);
                                454               6 :                 if (ss == NULL)
 2718 tgl                       455 UBC           0 :                     break;      /* NOTE BREAK OUT */
                                456                 :             }
 2718 tgl                       457 CBC          48 :             cp++;
                                458              48 :             ss->lastseen = cp;
                                459              48 :             css = ss;
                                460                 :         }
                                461                 :     }
                                462                 : 
                                463              42 :     *lastcss = ss;
                                464              42 :     *lastcp = cp;
                                465                 : 
                                466              42 :     if (ss == NULL)
 2718 tgl                       467 UBC           0 :         return 0;               /* impossible match, or internal error */
                                468                 : 
                                469                 :     /* We need to process one more chr, or the EOS symbol, to check match */
 2718 tgl                       470 CBC          42 :     if (cp < v->stop)
                                471                 :     {
                                472                 :         FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
                                473              42 :         co = GETCOLOR(cm, *cp);
                                474                 :         FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
                                475              42 :         ss = css->outs[co];
                                476              42 :         if (ss == NULL)
                                477              27 :             ss = miss(v, d, css, co, cp + 1, v->start);
                                478                 :     }
                                479                 :     else
                                480                 :     {
 2718 tgl                       481 UBC           0 :         assert(cp == v->stop);
                                482               0 :         co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
                                483                 :         FDEBUG(("color %ld\n", (long) co));
                                484               0 :         ss = miss(v, d, css, co, cp, v->start);
                                485                 :     }
                                486                 : 
 2718 tgl                       487 CBC          42 :     if (ss == NULL || !(ss->flags & POSTSTATE))
                                488              30 :         return 0;
                                489                 : 
                                490              12 :     return 1;
                                491                 : }
                                492                 : 
                                493                 : /*
                                494                 :  * dfa_backref - find best match length for a known backref string
                                495                 :  *
                                496                 :  * When the backref's referent is already available, we can deliver an exact
                                497                 :  * answer with considerably less work than running the backref node's NFA.
                                498                 :  *
                                499                 :  * Return match endpoint for longest or shortest valid repeated match,
                                500                 :  * or NULL if there is no valid match.
                                501                 :  *
                                502                 :  * Should be in sync with cbrdissect(), although that has the different task
                                503                 :  * of checking a match to a predetermined section of the string.
                                504                 :  */
                                505                 : static chr *
  768                           506             616 : dfa_backref(struct vars *v,
                                507                 :             struct dfa *d,
                                508                 :             chr *start,         /* where the match should start */
                                509                 :             chr *min,           /* match must end at or after here */
                                510                 :             chr *max,           /* match must end at or before here */
                                511                 :             bool shortest)
                                512                 : {
                                513             616 :     int         n = d->backno;
                                514             616 :     int         backmin = d->backmin;
                                515             616 :     int         backmax = d->backmax;
                                516                 :     size_t      numreps;
                                517                 :     size_t      minreps;
                                518                 :     size_t      maxreps;
                                519                 :     size_t      brlen;
                                520                 :     chr        *brstring;
                                521                 :     chr        *p;
                                522                 : 
                                523                 :     /* get the backreferenced string (caller should have checked this) */
                                524             616 :     if (v->pmatch[n].rm_so == -1)
  768 tgl                       525 UBC           0 :         return NULL;
  768 tgl                       526 CBC         616 :     brstring = v->start + v->pmatch[n].rm_so;
                                527             616 :     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
                                528                 : 
                                529                 :     /* special-case zero-length backreference to avoid divide by zero */
                                530             616 :     if (brlen == 0)
                                531                 :     {
                                532                 :         /*
                                533                 :          * matches only a zero-length string, but any number of repetitions
                                534                 :          * can be considered to be present
                                535                 :          */
                                536               1 :         if (min == start && backmin <= backmax)
                                537               1 :             return start;
  768 tgl                       538 UBC           0 :         return NULL;
                                539                 :     }
                                540                 : 
                                541                 :     /*
                                542                 :      * convert min and max into numbers of possible repetitions of the backref
                                543                 :      * string, rounding appropriately
                                544                 :      */
  768 tgl                       545 CBC         615 :     if (min <= start)
                                546             615 :         minreps = 0;
                                547                 :     else
  768 tgl                       548 UBC           0 :         minreps = (min - start - 1) / brlen + 1;
  768 tgl                       549 CBC         615 :     maxreps = (max - start) / brlen;
                                550                 : 
                                551                 :     /* apply bounds, then see if there is any allowed match length */
                                552             615 :     if (minreps < backmin)
                                553             597 :         minreps = backmin;
                                554             615 :     if (backmax != DUPINF && maxreps > backmax)
                                555             297 :         maxreps = backmax;
                                556             615 :     if (maxreps < minreps)
                                557             134 :         return NULL;
                                558                 : 
                                559                 :     /* quick exit if zero-repetitions match is valid and preferred */
                                560             481 :     if (shortest && minreps == 0)
  768 tgl                       561 UBC           0 :         return start;
                                562                 : 
                                563                 :     /* okay, compare the actual string contents */
  768 tgl                       564 CBC         481 :     p = start;
                                565             481 :     numreps = 0;
                                566             570 :     while (numreps < maxreps)
                                567                 :     {
                                568             492 :         if ((*v->g->compare) (brstring, p, brlen) != 0)
                                569             403 :             break;
                                570              89 :         p += brlen;
                                571              89 :         numreps++;
                                572              89 :         if (shortest && numreps >= minreps)
  768 tgl                       573 UBC           0 :             break;
                                574                 :     }
                                575                 : 
  768 tgl                       576 CBC         481 :     if (numreps >= minreps)
                                577              82 :         return p;
                                578             399 :     return NULL;
                                579                 : }
                                580                 : 
                                581                 : /*
                                582                 :  * lastcold - determine last point at which no progress had been made
                                583                 :  */
                                584                 : static chr *                    /* endpoint, or NULL */
 2118                           585          804352 : lastcold(struct vars *v,
                                586                 :          struct dfa *d)
                                587                 : {
                                588                 :     struct sset *ss;
                                589                 :     chr        *nopr;
                                590                 :     int         i;
                                591                 : 
 7368                           592          804352 :     nopr = d->lastnopr;
                                593          804352 :     if (nopr == NULL)
                                594          804350 :         nopr = v->start;
                                595         4282624 :     for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
 7188 bruce                     596         3478272 :         if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
 7368 tgl                       597         1171118 :             nopr = ss->lastseen;
                                598          804352 :     return nopr;
                                599                 : }
                                600                 : 
                                601                 : /*
                                602                 :  * newdfa - set up a fresh DFA
                                603                 :  *
                                604                 :  * Returns NULL (and sets v->err) on failure.
                                605                 :  */
                                606                 : static struct dfa *
 2118                           607         1365075 : newdfa(struct vars *v,
                                608                 :        struct cnfa *cnfa,
                                609                 :        struct colormap *cm,
                                610                 :        struct smalldfa *sml)    /* preallocated space, may be NULL */
                                611                 : {
                                612                 :     struct dfa *d;
 7188 bruce                     613         1365075 :     size_t      nss = cnfa->nstates * 2;
                                614         1365075 :     int         wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
  777 tgl                       615         1365075 :     bool        ismalloced = false;
                                616                 : 
 7368                           617         1365075 :     assert(cnfa != NULL && cnfa->nstates != 0);
                                618                 : 
 7188 bruce                     619         1365075 :     if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
                                620                 :     {
 7368 tgl                       621         1261589 :         assert(wordsper == 1);
 4069                           622         1261589 :         if (sml == NULL)
                                623                 :         {
                                624           11653 :             sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
                                625           11653 :             if (sml == NULL)
                                626                 :             {
 7368 tgl                       627 UBC           0 :                 ERR(REG_ESPACE);
                                628               0 :                 return NULL;
                                629                 :             }
  777 tgl                       630 CBC       11653 :             ismalloced = true;
                                631                 :         }
 4069                           632         1261589 :         d = &sml->dfa;
                                633         1261589 :         d->ssets = sml->ssets;
                                634         1261589 :         d->statesarea = sml->statesarea;
 7368                           635         1261589 :         d->work = &d->statesarea[nss];
 4069                           636         1261589 :         d->outsarea = sml->outsarea;
                                637         1261589 :         d->incarea = sml->incarea;
  777                           638         1261589 :         d->ismalloced = ismalloced;
                                639         1261589 :         d->arraysmalloced = false;   /* not separately allocated, anyway */
                                640                 :     }
                                641                 :     else
                                642                 :     {
 7188 bruce                     643          103486 :         d = (struct dfa *) MALLOC(sizeof(struct dfa));
                                644          103486 :         if (d == NULL)
                                645                 :         {
 7368 tgl                       646 UBC           0 :             ERR(REG_ESPACE);
                                647               0 :             return NULL;
                                648                 :         }
 7188 bruce                     649 CBC      103486 :         d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
                                650          103486 :         d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
                                651                 :                                             sizeof(unsigned));
 7368 tgl                       652          103486 :         d->work = &d->statesarea[nss * wordsper];
 7188 bruce                     653          103486 :         d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
                                654                 :                                               sizeof(struct sset *));
                                655          103486 :         d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
                                656                 :                                             sizeof(struct arcp));
  777 tgl                       657          103486 :         d->ismalloced = true;
                                658          103486 :         d->arraysmalloced = true;
                                659                 :         /* now freedfa() will behave sanely */
 7368                           660          103486 :         if (d->ssets == NULL || d->statesarea == NULL ||
 7188 bruce                     661          103486 :             d->outsarea == NULL || d->incarea == NULL)
                                662                 :         {
 7368 tgl                       663 UBC           0 :             freedfa(d);
                                664               0 :             ERR(REG_ESPACE);
                                665               0 :             return NULL;
                                666                 :         }
                                667                 :     }
                                668                 : 
 7188 bruce                     669 CBC     1365075 :     d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
 7368 tgl                       670         1365075 :     d->nssused = 0;
                                671         1365075 :     d->nstates = cnfa->nstates;
                                672         1365075 :     d->ncolors = cnfa->ncolors;
                                673         1365075 :     d->wordsper = wordsper;
                                674         1365075 :     d->cnfa = cnfa;
                                675         1365075 :     d->cm = cm;
                                676         1365075 :     d->lastpost = NULL;
                                677         1365075 :     d->lastnopr = NULL;
                                678         1365075 :     d->search = d->ssets;
  768                           679         1365075 :     d->backno = -1;              /* may be set by caller */
                                680         1365075 :     d->backmin = d->backmax = 0;
                                681                 : 
                                682                 :     /* initialization of sset fields is done as needed */
                                683                 : 
 7368                           684         1365075 :     return d;
                                685                 : }
                                686                 : 
                                687                 : /*
                                688                 :  * freedfa - free a DFA
                                689                 :  */
                                690                 : static void
 2118                           691         1365075 : freedfa(struct dfa *d)
                                692                 : {
  777                           693         1365075 :     if (d->arraysmalloced)
                                694                 :     {
 7368                           695          103486 :         if (d->ssets != NULL)
                                696          103486 :             FREE(d->ssets);
                                697          103486 :         if (d->statesarea != NULL)
                                698          103486 :             FREE(d->statesarea);
                                699          103486 :         if (d->outsarea != NULL)
                                700          103486 :             FREE(d->outsarea);
                                701          103486 :         if (d->incarea != NULL)
                                702          103486 :             FREE(d->incarea);
                                703                 :     }
                                704                 : 
  777                           705         1365075 :     if (d->ismalloced)
                                706          115139 :         FREE(d);
 7368                           707         1365075 : }
                                708                 : 
                                709                 : /*
                                710                 :  * hash - construct a hash code for a bitvector
                                711                 :  *
                                712                 :  * There are probably better ways, but they're more expensive.
                                713                 :  */
                                714                 : static unsigned
                                715           15519 : hash(unsigned *uv,
                                716                 :      int n)
                                717                 : {
                                718                 :     int         i;
                                719                 :     unsigned    h;
                                720                 : 
                                721           15519 :     h = 0;
                                722           74517 :     for (i = 0; i < n; i++)
                                723           58998 :         h ^= uv[i];
                                724           15519 :     return h;
                                725                 : }
                                726                 : 
                                727                 : /*
                                728                 :  * initialize - hand-craft a cache entry for startup, otherwise get ready
                                729                 :  */
                                730                 : static struct sset *
 2118                           731         1366701 : initialize(struct vars *v,
                                732                 :            struct dfa *d,
                                733                 :            chr *start)
                                734                 : {
                                735                 :     struct sset *ss;
                                736                 :     int         i;
                                737                 : 
                                738                 :     /* is previous one still there? */
 7188 bruce                     739         1366701 :     if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
 7368 tgl                       740            2936 :         ss = &d->ssets[0];
                                741                 :     else
                                742                 :     {                           /* no, must (re)build it */
                                743         1363765 :         ss = getvacant(v, d, start, start);
 2746                           744         1363765 :         if (ss == NULL)
 2746 tgl                       745 UBC           0 :             return NULL;
 7368 tgl                       746 CBC     2728754 :         for (i = 0; i < d->wordsper; i++)
                                747         1364989 :             ss->states[i] = 0;
                                748         1363765 :         BSET(ss->states, d->cnfa->pre);
                                749         1363765 :         ss->hash = HASH(ss->states, d->wordsper);
                                750         1363765 :         assert(d->cnfa->pre != d->cnfa->post);
 7188 bruce                     751         1363765 :         ss->flags = STARTER | LOCKED | NOPROGRESS;
                                752                 :         /* lastseen dealt with below */
                                753                 :     }
                                754                 : 
 7368 tgl                       755         2771074 :     for (i = 0; i < d->nssused; i++)
                                756         1404373 :         d->ssets[i].lastseen = NULL;
                                757         1366701 :     ss->lastseen = start;        /* maybe untrue, but harmless */
                                758         1366701 :     d->lastpost = NULL;
                                759         1366701 :     d->lastnopr = NULL;
                                760         1366701 :     return ss;
                                761                 : }
                                762                 : 
                                763                 : /*
                                764                 :  * miss - handle a stateset cache miss
                                765                 :  *
                                766                 :  * css is the current stateset, co is the color of the current input character,
                                767                 :  * cp points to the character after that (which is where we may need to test
                                768                 :  * LACONs).  start does not affect matching behavior but is needed for pickss'
                                769                 :  * heuristics about which stateset cache entry to replace.
                                770                 :  *
                                771                 :  * Ordinarily, returns the address of the next stateset (the one that is
                                772                 :  * valid after consuming the input character).  Returns NULL if no valid
                                773                 :  * NFA states remain, ie we have a certain match failure.
                                774                 :  * Internal errors also return NULL, with v->err set.
                                775                 :  */
                                776                 : static struct sset *
 2118                           777         6223704 : miss(struct vars *v,
                                778                 :      struct dfa *d,
                                779                 :      struct sset *css,
                                780                 :      color co,
                                781                 :      chr *cp,                   /* next chr */
                                782                 :      chr *start)                /* where the attempt got started */
                                783                 : {
 7368                           784         6223704 :     struct cnfa *cnfa = d->cnfa;
                                785                 :     int         i;
                                786                 :     unsigned    h;
                                787                 :     struct carc *ca;
                                788                 :     struct sset *p;
                                789                 :     int         ispseudocolor;
                                790                 :     int         ispost;
                                791                 :     int         noprogress;
                                792                 :     int         gotstate;
                                793                 :     int         dolacons;
                                794                 :     int         sawlacons;
                                795                 : 
                                796                 :     /* for convenience, we can be called even if it might not be a miss */
 7188 bruce                     797         6223704 :     if (css->outs[co] != NULL)
                                798                 :     {
                                799                 :         FDEBUG(("hit\n"));
 7368 tgl                       800            2294 :         return css->outs[co];
                                801                 :     }
                                802                 :     FDEBUG(("miss\n"));
                                803                 : 
                                804                 :     /*
                                805                 :      * Checking for operation cancel in the inner text search loop seems
                                806                 :      * unduly expensive.  As a compromise, check during cache misses.
                                807                 :      */
    1 tmunro                    808 GNC     6221410 :     INTERRUPT(v->re);
                                809                 : 
                                810                 :     /*
 2746 tgl                       811 ECB             :      * What set of states would we end up in after consuming the co character?
                                812                 :      * We first consider PLAIN arcs that consume the character, and then look
                                813                 :      * to see what LACON arcs could be traversed after consuming it.
                                814                 :      */
 7368 tgl                       815 CBC    12487813 :     for (i = 0; i < d->wordsper; i++)
 2746                           816         6266403 :         d->work[i] = 0;          /* build new stateset bitmap in d->work */
  778                           817         6221410 :     ispseudocolor = d->cm->cd[co].flags & PSEUDO;
 7368                           818         6221410 :     ispost = 0;
                                819         6221410 :     noprogress = 1;
                                820         6221410 :     gotstate = 0;
                                821        47874095 :     for (i = 0; i < d->nstates; i++)
 7368 tgl                       822 GIC    41652685 :         if (ISBSET(css->states, i))
 3928 tgl                       823 CBC    43906243 :             for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
  778                           824        33031975 :                 if (ca->co == co ||
                                825        30210464 :                     (ca->co == RAINBOW && !ispseudocolor))
 7188 bruce                     826 ECB             :                 {
 7368 tgl                       827 CBC    11386580 :                     BSET(d->work, ca->to);
                                828        11386580 :                     gotstate = 1;
 7368 tgl                       829 GIC    11386580 :                     if (ca->to == cnfa->post)
                                830         1047233 :                         ispost = 1;
 3928 tgl                       831 CBC    11386580 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
 7368                           832         3753357 :                         noprogress = 0;
 7368 tgl                       833 ECB             :                     FDEBUG(("%d -> %d\n", i, ca->to));
                                834                 :                 }
 2746 tgl                       835 GIC     6221410 :     if (!gotstate)
 2746 tgl                       836 CBC      787037 :         return NULL;            /* character cannot reach any new state */
 2746 tgl                       837 GIC     5434373 :     dolacons = (cnfa->flags & HASLACONS);
 7368 tgl                       838 CBC     5434373 :     sawlacons = 0;
 2746 tgl                       839 ECB             :     /* outer loop handles transitive closure of reachable-by-LACON states */
 7188 bruce                     840 CBC     5435175 :     while (dolacons)
 2746 tgl                       841 ECB             :     {
 7368 tgl                       842 GIC         802 :         dolacons = 0;
 7368 tgl                       843 CBC        7346 :         for (i = 0; i < d->nstates; i++)
                                844            6544 :             if (ISBSET(d->work, i))
 3928                           845            2600 :                 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
 7188 bruce                     846 ECB             :                 {
 3928 tgl                       847 CBC        1560 :                     if (ca->co < cnfa->ncolors)
 2118                           848            1374 :                         continue;   /* not a LACON arc */
 7368 tgl                       849 GIC         186 :                     if (ISBSET(d->work, ca->to))
 2118 tgl                       850 CBC          66 :                         continue;   /* arc would be a no-op anyway */
 2118 tgl                       851 GBC         120 :                     sawlacons = 1;  /* this LACON affects our result */
 7368 tgl                       852 CBC         120 :                     if (!lacon(v, cnfa, cp, ca->co))
                                853                 :                     {
 2746                           854              63 :                         if (ISERR())
 2746 tgl                       855 UBC           0 :                             return NULL;
 2118 tgl                       856 CBC          63 :                         continue;   /* LACON arc cannot be traversed */
 2746 tgl                       857 ECB             :                     }
 2746 tgl                       858 CBC          57 :                     if (ISERR())
 2746 tgl                       859 UBC           0 :                         return NULL;
 7368 tgl                       860 CBC          57 :                     BSET(d->work, ca->to);
                                861              57 :                     dolacons = 1;
 7368 tgl                       862 GIC          57 :                     if (ca->to == cnfa->post)
 7368 tgl                       863 UIC           0 :                         ispost = 1;
 3928 tgl                       864 GIC          57 :                     if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
 7368 tgl                       865 CBC          57 :                         noprogress = 0;
                                866                 :                     FDEBUG(("%d :> %d\n", i, ca->to));
                                867                 :                 }
 7368 tgl                       868 ECB             :     }
 7368 tgl                       869 CBC     5434373 :     h = HASH(d->work, d->wordsper);
                                870                 : 
                                871                 :     /* Is this stateset already in the cache? */
 7368 tgl                       872 GIC    18931848 :     for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
 7188 bruce                     873        14673285 :         if (HIT(h, d->work, p, d->wordsper))
 7188 bruce                     874 ECB             :         {
                                875                 :             FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
                                876                 :             break;              /* NOTE BREAK OUT */
 7368 tgl                       877                 :         }
 7188 bruce                     878 GBC     5434373 :     if (i == 0)
 7188 bruce                     879 ECB             :     {                           /* nope, need a new cache entry */
 7368 tgl                       880 CBC     4258563 :         p = getvacant(v, d, cp, start);
 2746                           881         4258563 :         if (p == NULL)
 2746 tgl                       882 LBC           0 :             return NULL;
 7368 tgl                       883 CBC     4258563 :         assert(p != css);
                                884         8555004 :         for (i = 0; i < d->wordsper; i++)
                                885         4296441 :             p->states[i] = d->work[i];
 7368 tgl                       886 GIC     4258563 :         p->hash = h;
                                887         4258563 :         p->flags = (ispost) ? POSTSTATE : 0;
                                888         4258563 :         if (noprogress)
                                889         1364765 :             p->flags |= NOPROGRESS;
                                890                 :         /* lastseen to be dealt with by caller */
                                891                 :     }
                                892                 : 
                                893                 :     /*
                                894                 :      * Link new stateset to old, unless a LACON affected the result, in which
                                895                 :      * case we don't create the link.  That forces future transitions across
 2746 tgl                       896 ECB             :      * this same arc (same prior stateset and character color) to come through
                                897                 :      * miss() again, so that we can recheck the LACON(s), which might or might
                                898                 :      * not pass since context will be different.
                                899                 :      */
 7188 bruce                     900 CBC     5434373 :     if (!sawlacons)
 2746 tgl                       901 ECB             :     {
 5664                           902                 :         FDEBUG(("c%d[%d]->c%d\n",
                                903                 :                 (int) (css - d->ssets), co, (int) (p - d->ssets)));
 7368 tgl                       904 GIC     5434285 :         css->outs[co] = p;
 7368 tgl                       905 CBC     5434285 :         css->inchain[co] = p->ins;
 7368 tgl                       906 GIC     5434285 :         p->ins.ss = css;
 2424                           907         5434285 :         p->ins.co = co;
                                908                 :     }
 7368                           909         5434373 :     return p;
                                910                 : }
                                911                 : 
 7368 tgl                       912 ECB             : /*
                                913                 :  * lacon - lookaround-constraint checker for miss()
                                914                 :  */
                                915                 : static int                      /* predicate:  constraint satisfied? */
 2118 tgl                       916 GIC         120 : lacon(struct vars *v,
                                917                 :       struct cnfa *pcnfa,       /* parent cnfa */
                                918                 :       chr *cp,
                                919                 :       color co)                 /* "color" of the lookaround constraint */
                                920                 : {
                                921                 :     int         n;
                                922                 :     struct subre *sub;
                                923                 :     struct dfa *d;
 7188 bruce                     924 ECB             :     chr        *end;
                                925                 :     int         satisfied;
 7368 tgl                       926 EUB             : 
 2746                           927                 :     /* Since this is recursive, it could be driven to stack overflow */
 2746 tgl                       928 GIC         120 :     if (STACK_TOO_DEEP(v->re))
                                929                 :     {
 2746 tgl                       930 LBC           0 :         ERR(REG_ETOOBIG);
                                931               0 :         return 0;
                                932                 :     }
 2746 tgl                       933 ECB             : 
 7368 tgl                       934 CBC         120 :     n = co - pcnfa->ncolors;
 2718                           935             120 :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
 7368 tgl                       936 EUB             :     FDEBUG(("=== testing lacon %d\n", n));
 7368 tgl                       937 CBC         120 :     sub = &v->g->lacons[n];
 2718 tgl                       938 GIC         120 :     d = getladfa(v, n);
 7188 bruce                     939             120 :     if (d == NULL)
 7368 tgl                       940 LBC           0 :         return 0;
  778 tgl                       941 GIC         120 :     if (LATYPE_IS_AHEAD(sub->latype))
 2718 tgl                       942 ECB             :     {
                                943                 :         /* used to use longest() here, but shortest() could be much cheaper */
 2718 tgl                       944 GIC          60 :         end = shortest(v, d, cp, cp, v->stop,
                                945                 :                        (chr **) NULL, (int *) NULL);
  778                           946              60 :         satisfied = LATYPE_IS_POS(sub->latype) ? (end != NULL) : (end == NULL);
                                947                 :     }
                                948                 :     else
                                949                 :     {
                                950                 :         /*
                                951                 :          * To avoid doing O(N^2) work when repeatedly testing a lookbehind
                                952                 :          * constraint in an N-character string, we use matchuntil() which can
                                953                 :          * cache the DFA state across calls.  We only need to restart if the
 2718 tgl                       954 ECB             :          * probe point decreases, which is not common.  The NFA we're using is
                                955                 :          * a search NFA, so it doesn't mind scanning over stuff before the
 2718 tgl                       956 EUB             :          * nominal match.
                                957                 :          */
 2718 tgl                       958 GIC          60 :         satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
  778 tgl                       959 CBC          60 :         if (!LATYPE_IS_POS(sub->latype))
 2718 tgl                       960 UIC           0 :             satisfied = !satisfied;
                                961                 :     }
                                962                 :     FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
 2718 tgl                       963 GIC         120 :     return satisfied;
                                964                 : }
                                965                 : 
                                966                 : /*
                                967                 :  * getvacant - get a vacant state set
                                968                 :  *
 7368 tgl                       969 ECB             :  * This routine clears out the inarcs and outarcs, but does not otherwise
                                970                 :  * clear the innards of the state set -- that's up to the caller.
                                971                 :  */
                                972                 : static struct sset *
 2118 tgl                       973 GIC     5622328 : getvacant(struct vars *v,
                                974                 :           struct dfa *d,
                                975                 :           chr *cp,
                                976                 :           chr *start)
                                977                 : {
                                978                 :     int         i;
                                979                 :     struct sset *ss;
 7368 tgl                       980 ECB             :     struct sset *p;
                                981                 :     struct arcp ap;
 7188 bruce                     982 EUB             :     color       co;
 7368 tgl                       983 ECB             : 
 7368 tgl                       984 GIC     5622328 :     ss = pickss(v, d, cp, start);
 2746                           985         5622328 :     if (ss == NULL)
 2746 tgl                       986 LBC           0 :         return NULL;
 7188 bruce                     987 CBC     5622328 :     assert(!(ss->flags & LOCKED));
                                988                 : 
 7368 tgl                       989 ECB             :     /* clear out its inarcs, including self-referential ones */
 7368 tgl                       990 GIC     5622328 :     ap = ss->ins;
 7188 bruce                     991 CBC     5622340 :     while ((p = ap.ss) != NULL)
 7188 bruce                     992 ECB             :     {
 7368 tgl                       993 CBC          12 :         co = ap.co;
                                994                 :         FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
                                995              12 :         p->outs[co] = NULL;
 7368 tgl                       996 GIC          12 :         ap = p->inchain[co];
 2118                           997              12 :         p->inchain[co].ss = NULL;    /* paranoia */
 7368 tgl                       998 ECB             :     }
 7368 tgl                       999 GIC     5622328 :     ss->ins.ss = NULL;
 7368 tgl                      1000 ECB             : 
                               1001                 :     /* take it off the inarc chains of the ssets reached by its outarcs */
 7188 bruce                    1002 CBC    48615868 :     for (i = 0; i < d->ncolors; i++)
 7188 bruce                    1003 ECB             :     {
 7368 tgl                      1004 GIC    42993540 :         p = ss->outs[i];
 7368 tgl                      1005 CBC    42993540 :         assert(p != ss);        /* not self-referential */
                               1006        42993540 :         if (p == NULL)
 7188 bruce                    1007 GIC    42993474 :             continue;           /* NOTE CONTINUE */
                               1008                 :         FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
 7368 tgl                      1009 CBC          66 :         if (p->ins.ss == ss && p->ins.co == i)
 7368 tgl                      1010 GIC          60 :             p->ins = ss->inchain[i];
 7188 bruce                    1011 ECB             :         else
                               1012                 :         {
 6406 tgl                      1013 CBC           6 :             struct arcp lastap = {NULL, 0};
 6406 tgl                      1014 ECB             : 
 7368 tgl                      1015 CBC           6 :             assert(p->ins.ss != NULL);
                               1016              12 :             for (ap = p->ins; ap.ss != NULL &&
 7188 bruce                    1017              12 :                  !(ap.ss == ss && ap.co == i);
 7188 bruce                    1018 GIC           6 :                  ap = ap.ss->inchain[ap.co])
 7368 tgl                      1019 CBC           6 :                 lastap = ap;
                               1020               6 :             assert(ap.ss != NULL);
 7368 tgl                      1021 GIC           6 :             lastap.ss->inchain[lastap.co] = ss->inchain[i];
                               1022                 :         }
                               1023              66 :         ss->outs[i] = NULL;
 7368 tgl                      1024 CBC          66 :         ss->inchain[i].ss = NULL;
 7368 tgl                      1025 ECB             :     }
                               1026                 : 
                               1027                 :     /* if ss was a success state, may need to remember location */
 7188 bruce                    1028 GIC     5622328 :     if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
 7188 bruce                    1029 CBC          18 :         (d->lastpost == NULL || d->lastpost < ss->lastseen))
 7368 tgl                      1030              18 :         d->lastpost = ss->lastseen;
 7368 tgl                      1031 ECB             : 
                               1032                 :     /* likewise for a no-progress state */
 7188 bruce                    1033 CBC     5622328 :     if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
 7188 bruce                    1034 GIC           6 :         (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
 7368 tgl                      1035               6 :         d->lastnopr = ss->lastseen;
                               1036                 : 
                               1037         5622328 :     return ss;
                               1038                 : }
                               1039                 : 
 7368 tgl                      1040 ECB             : /*
                               1041                 :  * pickss - pick the next stateset to be used
                               1042                 :  */
                               1043                 : static struct sset *
 2118 tgl                      1044 GIC     5622328 : pickss(struct vars *v,
                               1045                 :        struct dfa *d,
                               1046                 :        chr *cp,
                               1047                 :        chr *start)
                               1048                 : {
                               1049                 :     int         i;
                               1050                 :     struct sset *ss;
 7368 tgl                      1051 ECB             :     struct sset *end;
                               1052                 :     chr        *ancient;
                               1053                 : 
                               1054                 :     /* shortcut for cases where cache isn't full */
 7188 bruce                    1055 CBC     5622328 :     if (d->nssused < d->nssets)
                               1056                 :     {
 7368 tgl                      1057 GIC     5622262 :         i = d->nssused;
 7368 tgl                      1058 CBC     5622262 :         d->nssused++;
                               1059         5622262 :         ss = &d->ssets[i];
 7368 tgl                      1060 ECB             :         FDEBUG(("new c%d\n", i));
                               1061                 :         /* set up innards */
 7368 tgl                      1062 CBC     5622262 :         ss->states = &d->statesarea[i * d->wordsper];
                               1063         5622262 :         ss->flags = 0;
                               1064         5622262 :         ss->ins.ss = NULL;
 7368 tgl                      1065 GIC     5622262 :         ss->ins.co = WHITE;      /* give it some value */
 7368 tgl                      1066 CBC     5622262 :         ss->outs = &d->outsarea[i * d->ncolors];
                               1067         5622262 :         ss->inchain = &d->incarea[i * d->ncolors];
 7188 bruce                    1068 GIC    48614266 :         for (i = 0; i < d->ncolors; i++)
 7188 bruce                    1069 ECB             :         {
 7368 tgl                      1070 GIC    42992004 :             ss->outs[i] = NULL;
                               1071        42992004 :             ss->inchain[i].ss = NULL;
                               1072                 :         }
 7368 tgl                      1073 CBC     5622262 :         return ss;
 7368 tgl                      1074 ECB             :     }
                               1075                 : 
 7368 tgl                      1076 EUB             :     /* look for oldest, or old enough anyway */
 7188 bruce                    1077 CBC          66 :     if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
                               1078              66 :         ancient = cp - d->nssets * 2 / 3;
 7368 tgl                      1079 ECB             :     else
 7368 tgl                      1080 UIC           0 :         ancient = start;
 7368 tgl                      1081 CBC          72 :     for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
 7368 tgl                      1082 GIC          66 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
 7188 bruce                    1083 CBC          66 :             !(ss->flags & LOCKED))
                               1084                 :         {
 7368 tgl                      1085              60 :             d->search = ss + 1;
 5664 tgl                      1086 ECB             :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
 7368 tgl                      1087 CBC          60 :             return ss;
                               1088                 :         }
                               1089              12 :     for (ss = d->ssets, end = d->search; ss < end; ss++)
 7368 tgl                      1090 GIC          12 :         if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
 7188 bruce                    1091 CBC          12 :             !(ss->flags & LOCKED))
                               1092                 :         {
 7368 tgl                      1093 GIC           6 :             d->search = ss + 1;
                               1094                 :             FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
                               1095               6 :             return ss;
 7368 tgl                      1096 EUB             :         }
                               1097                 : 
                               1098                 :     /* nobody's old enough?!? -- something's really wrong */
                               1099                 :     FDEBUG(("cannot find victim to replace!\n"));
 7368 tgl                      1100 UIC           0 :     ERR(REG_ASSERT);
 2746                          1101               0 :     return NULL;
                               1102                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a