LCOV - differential code coverage report
Current view: top level - src/backend/regex - regexec.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB
Current: Differential Code Coverage HEAD vs 15 Lines: 86.3 % 553 477 32 39 5 26 265 2 184 45 261
Current Date: 2023-04-08 17:13:01 Functions: 100.0 % 16 16 16 16
Baseline: 15 Line coverage date bins:
Baseline Date: 2023-04-08 15:09:40 [..60] days: 100.0 % 1 1 1
Legend: Lines: hit not hit (180,240] days: 100.0 % 1 1 1
(240..) days: 86.2 % 551 475 32 39 5 26 265 184 45 259
Function coverage date bins:
(240..) days: 50.0 % 32 16 16 16

 Age         Owner                  TLA  Line data    Source code
                                  1                 : /*
                                  2                 :  * re_*exec and friends - match REs
                                  3                 :  *
                                  4                 :  * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
                                  5                 :  *
                                  6                 :  * Development of this software was funded, in part, by Cray Research Inc.,
                                  7                 :  * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
                                  8                 :  * Corporation, none of whom are responsible for the results.  The author
                                  9                 :  * thanks all of them.
                                 10                 :  *
                                 11                 :  * Redistribution and use in source and binary forms -- with or without
                                 12                 :  * modification -- are permitted for any purpose, provided that
                                 13                 :  * redistributions in source form retain this entire copyright notice and
                                 14                 :  * indicate the origin and nature of any modifications.
                                 15                 :  *
                                 16                 :  * I'd appreciate being given credit for this package in the documentation
                                 17                 :  * of software which uses it, but that is not a requirement.
                                 18                 :  *
                                 19                 :  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
                                 20                 :  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
                                 21                 :  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
                                 22                 :  * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
                                 23                 :  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
                                 24                 :  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
                                 25                 :  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
                                 26                 :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
                                 27                 :  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
                                 28                 :  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
                                 29                 :  *
                                 30                 :  * src/backend/regex/regexec.c
                                 31                 :  *
                                 32                 :  */
                                 33                 : 
                                 34                 : #include "regex/regguts.h"
                                 35                 : 
                                 36                 : 
                                 37                 : 
                                 38                 : /* lazy-DFA representation */
                                 39                 : struct arcp
                                 40                 : {                               /* "pointer" to an outarc */
                                 41                 :     struct sset *ss;
                                 42                 :     color       co;
                                 43                 : };
                                 44                 : 
                                 45                 : struct sset
                                 46                 : {                               /* state set */
                                 47                 :     unsigned   *states;         /* pointer to bitvector */
                                 48                 :     unsigned    hash;           /* hash of bitvector */
                                 49                 : #define  HASH(bv, nw)    (((nw) == 1) ? *(bv) : hash(bv, nw))
                                 50                 : #define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
                                 51                 :         memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
                                 52                 :     int         flags;
                                 53                 : #define  STARTER     01         /* the initial state set */
                                 54                 : #define  POSTSTATE   02         /* includes the goal state */
                                 55                 : #define  LOCKED      04         /* locked in cache */
                                 56                 : #define  NOPROGRESS  010        /* zero-progress state set */
                                 57                 :     struct arcp ins;            /* chain of inarcs pointing here */
                                 58                 :     chr        *lastseen;       /* last entered on arrival here */
                                 59                 :     struct sset **outs;         /* outarc vector indexed by color */
                                 60                 :     struct arcp *inchain;       /* chain-pointer vector for outarcs */
                                 61                 : };
                                 62                 : 
                                 63                 : struct dfa
                                 64                 : {
                                 65                 :     int         nssets;         /* size of cache */
                                 66                 :     int         nssused;        /* how many entries occupied yet */
                                 67                 :     int         nstates;        /* number of states */
                                 68                 :     int         ncolors;        /* length of outarc and inchain vectors */
                                 69                 :     int         wordsper;       /* length of state-set bitvectors */
                                 70                 :     struct sset *ssets;         /* state-set cache */
                                 71                 :     unsigned   *statesarea;     /* bitvector storage */
                                 72                 :     unsigned   *work;           /* pointer to work area within statesarea */
                                 73                 :     struct sset **outsarea;     /* outarc-vector storage */
                                 74                 :     struct arcp *incarea;       /* inchain storage */
                                 75                 :     struct cnfa *cnfa;
                                 76                 :     struct colormap *cm;
                                 77                 :     chr        *lastpost;       /* location of last cache-flushed success */
                                 78                 :     chr        *lastnopr;       /* location of last cache-flushed NOPROGRESS */
                                 79                 :     struct sset *search;        /* replacement-search-pointer memory */
                                 80                 :     int         backno;         /* if DFA for a backref, subno it refers to */
                                 81                 :     short       backmin;        /* min repetitions for backref */
                                 82                 :     short       backmax;        /* max repetitions for backref */
                                 83                 :     bool        ismalloced;     /* should this struct dfa be freed? */
                                 84                 :     bool        arraysmalloced; /* should its subsidiary arrays be freed? */
                                 85                 : };
                                 86                 : 
                                 87                 : #define WORK    1               /* number of work bitvectors needed */
                                 88                 : 
                                 89                 : /* setup for non-malloc allocation for small cases */
                                 90                 : #define FEWSTATES   20          /* must be less than UBITS */
                                 91                 : #define FEWCOLORS   15
                                 92                 : struct smalldfa
                                 93                 : {
                                 94                 :     struct dfa  dfa;            /* must be first */
                                 95                 :     struct sset ssets[FEWSTATES * 2];
                                 96                 :     unsigned    statesarea[FEWSTATES * 2 + WORK];
                                 97                 :     struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
                                 98                 :     struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
                                 99                 : };
                                100                 : 
                                101                 : #define DOMALLOC    ((struct smalldfa *)NULL)   /* force malloc */
                                102                 : 
                                103                 : 
                                104                 : 
                                105                 : /* internal variables, bundled for easy passing around */
                                106                 : struct vars
                                107                 : {
                                108                 :     regex_t    *re;
                                109                 :     struct guts *g;
                                110                 :     int         eflags;         /* copies of arguments */
                                111                 :     size_t      nmatch;
                                112                 :     regmatch_t *pmatch;
                                113                 :     rm_detail_t *details;
                                114                 :     chr        *start;          /* start of string */
                                115                 :     chr        *search_start;   /* search start of string */
                                116                 :     chr        *stop;           /* just past end of string */
                                117                 :     int         err;            /* error code if any (0 none) */
                                118                 :     struct dfa **subdfas;       /* per-tree-subre DFAs */
                                119                 :     struct dfa **ladfas;        /* per-lacon-subre DFAs */
                                120                 :     struct sset **lblastcss;    /* per-lacon-subre lookbehind restart data */
                                121                 :     chr       **lblastcp;       /* per-lacon-subre lookbehind restart data */
                                122                 :     struct smalldfa dfa1;
                                123                 :     struct smalldfa dfa2;
                                124                 : };
                                125                 : 
                                126                 : #define VISERR(vv)  ((vv)->err != 0) /* have we seen an error yet? */
                                127                 : #define ISERR() VISERR(v)
                                128                 : #define VERR(vv,e)  ((vv)->err = ((vv)->err ? (vv)->err : (e)))
                                129                 : #define ERR(e)  VERR(v, e)      /* record an error */
                                130                 : #define NOERR() {if (ISERR()) return v->err;}    /* if error seen, return it */
                                131                 : #define OFF(p)  ((p) - v->start)
                                132                 : #define LOFF(p) ((long)OFF(p))
                                133                 : 
                                134                 : 
                                135                 : 
                                136                 : /*
                                137                 :  * forward declarations
                                138                 :  */
                                139                 : /* === regexec.c === */
                                140                 : static struct dfa *getsubdfa(struct vars *v, struct subre *t);
                                141                 : static struct dfa *getladfa(struct vars *v, int n);
                                142                 : static int  find(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
                                143                 : static int  cfind(struct vars *v, struct cnfa *cnfa, struct colormap *cm);
                                144                 : static int  cfindloop(struct vars *v, struct cnfa *cnfa, struct colormap *cm,
                                145                 :                       struct dfa *d, struct dfa *s, chr **coldp);
                                146                 : static void zapallsubs(regmatch_t *p, size_t n);
                                147                 : static void zaptreesubs(struct vars *v, struct subre *t);
                                148                 : static void subset(struct vars *v, struct subre *sub, chr *begin, chr *end);
                                149                 : static int  cdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                150                 : static int  ccondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                151                 : static int  crevcondissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                152                 : static int  cbrdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                153                 : static int  caltdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                154                 : static int  citerdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                155                 : static int  creviterdissect(struct vars *v, struct subre *t, chr *begin, chr *end);
                                156                 : 
                                157                 : /* === rege_dfa.c === */
                                158                 : static chr *longest(struct vars *v, struct dfa *d,
                                159                 :                     chr *start, chr *stop, int *hitstopp);
                                160                 : static chr *shortest(struct vars *v, struct dfa *d, chr *start, chr *min,
                                161                 :                      chr *max, chr **coldp, int *hitstopp);
                                162                 : static int  matchuntil(struct vars *v, struct dfa *d, chr *probe,
                                163                 :                        struct sset **lastcss, chr **lastcp);
                                164                 : static chr *dfa_backref(struct vars *v, struct dfa *d, chr *start,
                                165                 :                         chr *min, chr *max, bool shortest);
                                166                 : static chr *lastcold(struct vars *v, struct dfa *d);
                                167                 : static struct dfa *newdfa(struct vars *v, struct cnfa *cnfa,
                                168                 :                           struct colormap *cm, struct smalldfa *sml);
                                169                 : static void freedfa(struct dfa *d);
                                170                 : static unsigned hash(unsigned *uv, int n);
                                171                 : static struct sset *initialize(struct vars *v, struct dfa *d, chr *start);
                                172                 : static struct sset *miss(struct vars *v, struct dfa *d, struct sset *css,
                                173                 :                          color co, chr *cp, chr *start);
                                174                 : static int  lacon(struct vars *v, struct cnfa *pcnfa, chr *cp, color co);
                                175                 : static struct sset *getvacant(struct vars *v, struct dfa *d, chr *cp,
                                176                 :                               chr *start);
                                177                 : static struct sset *pickss(struct vars *v, struct dfa *d, chr *cp,
                                178                 :                            chr *start);
                                179                 : 
                                180                 : 
                                181                 : /*
                                182                 :  * pg_regexec - match regular expression
                                183                 :  */
                                184                 : int
 7368 tgl                       185 GIC      901896 : pg_regexec(regex_t *re,
                                186                 :            const chr *string,
                                187                 :            size_t len,
                                188                 :            size_t search_start,
                                189                 :            rm_detail_t *details,
                                190                 :            size_t nmatch,
                                191                 :            regmatch_t pmatch[],
                                192                 :            int flags)
                                193                 : {
 7368 tgl                       194 ECB             :     struct vars var;
  197 andres                    195 GNC      901896 :     struct vars *v = &var;
                                196                 :     int         st;
                                197                 :     size_t      n;
                                198                 :     size_t      i;
                                199                 :     int         backref;
                                200                 : 
                                201                 : #define  LOCALMAT    20
                                202                 :     regmatch_t  mat[LOCALMAT];
                                203                 : 
 4062 tgl                       204 ECB             : #define  LOCALDFAS   40
                                205                 :     struct dfa *subdfas[LOCALDFAS];
                                206                 : 
                                207                 :     /* sanity checks */
 7368 tgl                       208 GIC      901896 :     if (re == NULL || string == NULL || re->re_magic != REMAGIC)
 7368 tgl                       209 UIC           0 :         return REG_INVARG;
 7368 tgl                       210 GIC      901896 :     if (re->re_csize != sizeof(chr))
 7368 tgl                       211 UIC           0 :         return REG_MIXED;
  575 tgl                       212 GIC      901896 :     if (search_start > len)
                                213               3 :         return REG_NOMATCH;
                                214                 : 
                                215                 :     /* Initialize locale-dependent support */
 4382                           216          901893 :     pg_set_regex_collation(re->re_collation);
 4382 tgl                       217 ECB             : 
 7368 tgl                       218 EUB             :     /* setup */
 7368 tgl                       219 CBC      901893 :     v->re = re;
 7188 bruce                     220 GBC      901893 :     v->g = (struct guts *) re->re_guts;
 7188 bruce                     221 CBC      901893 :     if ((v->g->cflags & REG_EXPECT) && details == NULL)
 7368 tgl                       222 LBC           0 :         return REG_INVARG;
 7188 bruce                     223 GIC      901893 :     if (v->g->info & REG_UIMPOSSIBLE)
 7368 tgl                       224             715 :         return REG_NOMATCH;
 7188 bruce                     225 CBC      901178 :     backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
 7368 tgl                       226 GIC      901178 :     v->eflags = flags;
  608                           227          901178 :     if (backref && nmatch <= v->g->nsub)
 7188 bruce                     228 ECB             :     {
  608 tgl                       229                 :         /* need larger work area */
  594 tgl                       230 CBC          91 :         v->nmatch = v->g->nsub + 1;
  594 tgl                       231 GBC          91 :         if (v->nmatch <= LOCALMAT)
 7368 tgl                       232 CBC          90 :             v->pmatch = mat;
 7368 tgl                       233 ECB             :         else
  594 tgl                       234 CBC           1 :             v->pmatch = (regmatch_t *) MALLOC(v->nmatch * sizeof(regmatch_t));
 7368                           235              91 :         if (v->pmatch == NULL)
 7368 tgl                       236 LBC           0 :             return REG_ESPACE;
  594 tgl                       237 GIC          91 :         zapallsubs(v->pmatch, v->nmatch);
                                238                 :     }
 7188 bruce                     239 ECB             :     else
  608 tgl                       240                 :     {
                                241                 :         /* we can store results directly in caller's array */
 7368 tgl                       242 GIC      901087 :         v->pmatch = pmatch;
  608 tgl                       243 ECB             :         /* ensure any extra entries in caller's array are filled with -1 */
  608 tgl                       244 CBC      901087 :         if (nmatch > 0)
  608 tgl                       245 GBC      556472 :             zapallsubs(pmatch, nmatch);
  608 tgl                       246 ECB             :         /* then forget about extra entries, to avoid useless work in find() */
  608 tgl                       247 GIC      901087 :         if (nmatch > v->g->nsub + 1)
                                248             677 :             nmatch = v->g->nsub + 1;
                                249          901087 :         v->nmatch = nmatch;
                                250                 :     }
 7368 tgl                       251 CBC      901178 :     v->details = details;
 7188 bruce                     252 GIC      901178 :     v->start = (chr *) string;
 6482 bruce                     253 CBC      901178 :     v->search_start = (chr *) string + search_start;
 7188                           254          901178 :     v->stop = (chr *) string + len;
 7368 tgl                       255 GIC      901178 :     v->err = 0;
 2718 tgl                       256 CBC      901178 :     v->subdfas = NULL;
                                257          901178 :     v->ladfas = NULL;
                                258          901178 :     v->lblastcss = NULL;
 2718 tgl                       259 GIC      901178 :     v->lblastcp = NULL;
 2718 tgl                       260 ECB             :     /* below this point, "goto cleanup" will behave sanely */
                                261                 : 
 4062 tgl                       262 CBC      901178 :     assert(v->g->ntree >= 0);
                                263          901178 :     n = (size_t) v->g->ntree;
                                264          901178 :     if (n <= LOCALDFAS)
                                265          901176 :         v->subdfas = subdfas;
 4062 tgl                       266 ECB             :     else
                                267                 :     {
 2718 tgl                       268 CBC           2 :         v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
 2718 tgl                       269 GIC           2 :         if (v->subdfas == NULL)
                                270                 :         {
 2718 tgl                       271 LBC           0 :             st = REG_ESPACE;
                                272               0 :             goto cleanup;
 2718 tgl                       273 ECB             :         }
 4062                           274                 :     }
 4062 tgl                       275 GIC     2719024 :     for (i = 0; i < n; i++)
                                276         1817846 :         v->subdfas[i] = NULL;
 7368 tgl                       277 ECB             : 
 2718 tgl                       278 CBC      901178 :     assert(v->g->nlacons >= 0);
 2718 tgl                       279 GIC      901178 :     n = (size_t) v->g->nlacons;
 2718 tgl                       280 GBC      901178 :     if (n > 0)
 2718 tgl                       281 EUB             :     {
 2718 tgl                       282 GIC         630 :         v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
                                283             630 :         if (v->ladfas == NULL)
 2718 tgl                       284 ECB             :         {
 2718 tgl                       285 LBC           0 :             st = REG_ESPACE;
 2718 tgl                       286 UIC           0 :             goto cleanup;
 2718 tgl                       287 ECB             :         }
 2718 tgl                       288 CBC        1922 :         for (i = 0; i < n; i++)
                                289            1292 :             v->ladfas[i] = NULL;
 2718 tgl                       290 GIC         630 :         v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
 2718 tgl                       291 CBC         630 :         v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
                                292             630 :         if (v->lblastcss == NULL || v->lblastcp == NULL)
                                293                 :         {
 2718 tgl                       294 UBC           0 :             st = REG_ESPACE;
                                295               0 :             goto cleanup;
                                296                 :         }
 2718 tgl                       297 CBC        1922 :         for (i = 0; i < n; i++)
 2718 tgl                       298 ECB             :         {
 2718 tgl                       299 CBC        1292 :             v->lblastcss[i] = NULL;
                                300            1292 :             v->lblastcp[i] = NULL;
 2718 tgl                       301 ECB             :         }
                                302                 :     }
 2718 tgl                       303 EUB             : 
 7368                           304                 :     /* do it */
 7368 tgl                       305 GIC      901178 :     assert(v->g->tree != NULL);
 7368 tgl                       306 CBC      901178 :     if (backref)
 7368 tgl                       307 GIC         145 :         st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
 7368 tgl                       308 ECB             :     else
 7368 tgl                       309 CBC      901033 :         st = find(v, &v->g->tree->cnfa, &v->g->cmap);
                                310                 : 
                                311                 :     /* on success, ensure caller's match vector is filled correctly */
  608 tgl                       312 GIC      901178 :     if (st == REG_OKAY && nmatch > 0)
                                313                 :     {
  608 tgl                       314 CBC      451389 :         if (v->pmatch != pmatch)
  608 tgl                       315 ECB             :         {
                                316                 :             /* copy portion of match vector over from (larger) work area */
  608 tgl                       317 GIC          21 :             assert(nmatch <= v->nmatch);
  608 tgl                       318 CBC          21 :             memcpy(VS(pmatch), VS(v->pmatch), nmatch * sizeof(regmatch_t));
                                319                 :         }
  608 tgl                       320 GIC      451389 :         if (v->g->cflags & REG_NOSUB)
  608 tgl                       321 ECB             :         {
                                322                 :             /* don't expose possibly-partial sub-match results to caller */
  608 tgl                       323 CBC      448309 :             zapallsubs(pmatch, nmatch);
                                324                 :         }
                                325                 :     }
 7368 tgl                       326 ECB             : 
                                327                 :     /* clean up */
 2718 tgl                       328 GIC      452869 : cleanup:
 7368 tgl                       329 CBC      901178 :     if (v->pmatch != pmatch && v->pmatch != mat)
 7368 tgl                       330 GIC           1 :         FREE(v->pmatch);
 2718                           331          901178 :     if (v->subdfas != NULL)
 2718 tgl                       332 ECB             :     {
 2718 tgl                       333 GIC      901178 :         n = (size_t) v->g->ntree;
                                334         2719024 :         for (i = 0; i < n; i++)
                                335                 :         {
                                336         1817846 :             if (v->subdfas[i] != NULL)
 2718 tgl                       337 CBC       12368 :                 freedfa(v->subdfas[i]);
 2718 tgl                       338 ECB             :         }
 2718 tgl                       339 CBC      901178 :         if (v->subdfas != subdfas)
                                340               2 :             FREE(v->subdfas);
                                341                 :     }
                                342          901178 :     if (v->ladfas != NULL)
 4062 tgl                       343 ECB             :     {
 2718 tgl                       344 GIC         630 :         n = (size_t) v->g->nlacons;
 2718 tgl                       345 CBC        1922 :         for (i = 0; i < n; i++)
 2718 tgl                       346 ECB             :         {
 2718 tgl                       347 GIC        1292 :             if (v->ladfas[i] != NULL)
 2718 tgl                       348 CBC          53 :                 freedfa(v->ladfas[i]);
 2718 tgl                       349 ECB             :         }
 2718 tgl                       350 GIC         630 :         FREE(v->ladfas);
 4062 tgl                       351 ECB             :     }
 2718 tgl                       352 GIC      901178 :     if (v->lblastcss != NULL)
 2718 tgl                       353 CBC         630 :         FREE(v->lblastcss);
                                354          901178 :     if (v->lblastcp != NULL)
 2718 tgl                       355 GIC         630 :         FREE(v->lblastcp);
 4062 tgl                       356 ECB             : 
  781                           357                 : #ifdef REG_DEBUG
                                358                 :     if (v->eflags & (REG_FTRACE | REG_MTRACE))
                                359                 :         fflush(stdout);
                                360                 : #endif
                                361                 : 
 7368 tgl                       362 CBC      901178 :     return st;
 7368 tgl                       363 ECB             : }
                                364                 : 
                                365                 : /*
                                366                 :  * getsubdfa - create or re-fetch the DFA for a tree subre node
                                367                 :  *
                                368                 :  * We only need to create the DFA once per overall regex execution.
                                369                 :  * The DFA will be freed by the cleanup step in pg_regexec().
                                370                 :  */
 4062                           371                 : static struct dfa *
 2118 tgl                       372 GIC       13054 : getsubdfa(struct vars *v,
                                373                 :           struct subre *t)
                                374                 : {
  768                           375           13054 :     struct dfa *d = v->subdfas[t->id];
                                376                 : 
                                377           13054 :     if (d == NULL)
                                378                 :     {
                                379           12368 :         d = newdfa(v, &t->cnfa, &v->g->cmap, DOMALLOC);
  762                           380           12368 :         if (d == NULL)
 4062 tgl                       381 LBC           0 :             return NULL;
                                382                 :         /* set up additional info if this is a backref node */
  768 tgl                       383 GIC       12368 :         if (t->op == 'b')
  768 tgl                       384 ECB             :         {
  768 tgl                       385 GIC         140 :             d->backno = t->backno;
  768 tgl                       386 CBC         140 :             d->backmin = t->min;
  768 tgl                       387 GIC         140 :             d->backmax = t->max;
  768 tgl                       388 ECB             :         }
  768 tgl                       389 CBC       12368 :         v->subdfas[t->id] = d;
 4062 tgl                       390 EUB             :     }
  768 tgl                       391 GIC       13054 :     return d;
 4062 tgl                       392 ECB             : }
                                393                 : 
 2718                           394                 : /*
                                395                 :  * getladfa - create or re-fetch the DFA for a LACON subre node
                                396                 :  *
                                397                 :  * Same as above, but for LACONs.
                                398                 :  */
                                399                 : static struct dfa *
 2118 tgl                       400 CBC         120 : getladfa(struct vars *v,
                                401                 :          int n)
                                402                 : {
 2718 tgl                       403 GIC         120 :     assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
                                404                 : 
                                405             120 :     if (v->ladfas[n] == NULL)
                                406                 :     {
                                407              53 :         struct subre *sub = &v->g->lacons[n];
                                408                 : 
 2718 tgl                       409 CBC          53 :         v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
                                410                 :         /* a LACON can't contain a backref, so nothing else to do */
                                411                 :     }
                                412             120 :     return v->ladfas[n];
                                413                 : }
 2718 tgl                       414 ECB             : 
                                415                 : /*
 7368                           416                 :  * find - find a match for the main NFA (no-complications case)
                                417                 :  */
                                418                 : static int
 2118 tgl                       419 GIC      901033 : find(struct vars *v,
                                420                 :      struct cnfa *cnfa,
 2118 tgl                       421 ECB             :      struct colormap *cm)
                                422                 : {
                                423                 :     struct dfa *s;
                                424                 :     struct dfa *d;
                                425                 :     chr        *begin;
 7188 bruce                     426 GIC      901033 :     chr        *end = NULL;
                                427                 :     chr        *cold;
 6385 bruce                     428 ECB             :     chr        *open;           /* open and close of range of possible starts */
                                429                 :     chr        *close;
                                430                 :     int         hitend;
 7188 bruce                     431 GIC      901033 :     int         shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
                                432                 : 
                                433                 :     /* first, a shot with the search RE */
 7368 tgl                       434          901033 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
  762 tgl                       435 CBC      901033 :     if (s == NULL)
  762 tgl                       436 UIC           0 :         return v->err;
                                437                 :     MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
 7368 tgl                       438 GIC      901033 :     cold = NULL;
 6482 bruce                     439          901033 :     close = shortest(v, s, v->search_start, v->search_start, v->stop,
 6482 bruce                     440 ECB             :                      &cold, (int *) NULL);
 7368 tgl                       441 GIC      901033 :     freedfa(s);
                                442          901033 :     NOERR();
 7188 bruce                     443 CBC      901033 :     if (v->g->cflags & REG_EXPECT)
 7188 bruce                     444 ECB             :     {
 7368 tgl                       445 GBC          27 :         assert(v->details != NULL);
 7368 tgl                       446 GIC          27 :         if (cold != NULL)
 7368 tgl                       447 CBC          27 :             v->details->rm_extend.rm_so = OFF(cold);
 7368 tgl                       448 ECB             :         else
 7368 tgl                       449 UIC           0 :             v->details->rm_extend.rm_so = OFF(v->stop);
 2118 tgl                       450 CBC          27 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
 7368 tgl                       451 ECB             :     }
 7188 bruce                     452 CBC      901033 :     if (close == NULL)          /* not found */
 7368 tgl                       453 GIC      330758 :         return REG_NOMATCH;
 7188 bruce                     454 CBC      570275 :     if (v->nmatch == 0)          /* found, don't need exact location */
 7368 tgl                       455          118944 :         return REG_OKAY;
 7368 tgl                       456 ECB             : 
                                457                 :     /* find starting point and match */
 7368 tgl                       458 GBC      451331 :     assert(cold != NULL);
 7368 tgl                       459 CBC      451331 :     open = cold;
 7368 tgl                       460 GIC      451331 :     cold = NULL;
 7368 tgl                       461 ECB             :     MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
 7368 tgl                       462 CBC      451331 :     d = newdfa(v, cnfa, cm, &v->dfa1);
  762                           463          451331 :     if (d == NULL)
  762 tgl                       464 LBC           0 :         return v->err;
 7188 bruce                     465 GIC      451360 :     for (begin = open; begin <= close; begin++)
                                466                 :     {
 7368 tgl                       467 ECB             :         MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
 7368 tgl                       468 CBC      451360 :         if (shorter)
                                469              61 :             end = shortest(v, d, begin, begin, v->stop,
                                470                 :                            (chr **) NULL, &hitend);
 7368 tgl                       471 ECB             :         else
 7368 tgl                       472 CBC      451299 :             end = longest(v, d, begin, v->stop, &hitend);
 2760 tgl                       473 GBC      451360 :         if (ISERR())
 2760 tgl                       474 ECB             :         {
 2760 tgl                       475 UIC           0 :             freedfa(d);
                                476               0 :             return v->err;
 2760 tgl                       477 ECB             :         }
 7368 tgl                       478 CBC      451360 :         if (hitend && cold == NULL)
 7368 tgl                       479 GIC        3061 :             cold = begin;
                                480          451360 :         if (end != NULL)
 7188 bruce                     481 CBC      451331 :             break;              /* NOTE BREAK OUT */
 7368 tgl                       482 ECB             :     }
 7368 tgl                       483 GIC      451331 :     assert(end != NULL);        /* search RE succeeded so loop should */
 7368 tgl                       484 GBC      451331 :     freedfa(d);
 7368 tgl                       485 EUB             : 
                                486                 :     /* and pin down details */
 7368 tgl                       487 CBC      451331 :     assert(v->nmatch > 0);
                                488          451331 :     v->pmatch[0].rm_so = OFF(begin);
                                489          451331 :     v->pmatch[0].rm_eo = OFF(end);
 7188 bruce                     490          451331 :     if (v->g->cflags & REG_EXPECT)
                                491                 :     {
 7368 tgl                       492              10 :         if (cold != NULL)
                                493               5 :             v->details->rm_extend.rm_so = OFF(cold);
                                494                 :         else
 7368 tgl                       495 GIC           5 :             v->details->rm_extend.rm_so = OFF(v->stop);
 2118 tgl                       496 CBC          10 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
 7368 tgl                       497 ECB             :     }
 7188 bruce                     498 CBC      451331 :     if (v->nmatch == 1)          /* no need for submatches */
 7368 tgl                       499          449125 :         return REG_OKAY;
                                500                 : 
 4062 tgl                       501 ECB             :     /* find submatches */
 4062 tgl                       502 CBC        2206 :     return cdissect(v, v->g->tree, begin, end);
                                503                 : }
 7368 tgl                       504 ECB             : 
                                505                 : /*
                                506                 :  * cfind - find a match for the main NFA (with complications)
                                507                 :  */
                                508                 : static int
 2118 tgl                       509 GIC         145 : cfind(struct vars *v,
                                510                 :       struct cnfa *cnfa,
 2118 tgl                       511 ECB             :       struct colormap *cm)
                                512                 : {
                                513                 :     struct dfa *s;
                                514                 :     struct dfa *d;
                                515                 :     chr        *cold;
                                516                 :     int         ret;
                                517                 : 
 7368 tgl                       518 CBC         145 :     s = newdfa(v, &v->g->search, cm, &v->dfa1);
  762 tgl                       519 GIC         145 :     if (s == NULL)
  762 tgl                       520 UIC           0 :         return v->err;
 7368 tgl                       521 GIC         145 :     d = newdfa(v, cnfa, cm, &v->dfa2);
  762                           522             145 :     if (d == NULL)
                                523                 :     {
 7368 tgl                       524 UIC           0 :         freedfa(s);
                                525               0 :         return v->err;
                                526                 :     }
 7368 tgl                       527 ECB             : 
 7368 tgl                       528 CBC         145 :     ret = cfindloop(v, cnfa, cm, d, s, &cold);
 7368 tgl                       529 EUB             : 
 7368 tgl                       530 CBC         145 :     freedfa(d);
                                531             145 :     freedfa(s);
 7368 tgl                       532 GIC         145 :     NOERR();
 7188 bruce                     533 GBC         145 :     if (v->g->cflags & REG_EXPECT)
 7188 bruce                     534 EUB             :     {
 7368 tgl                       535 UIC           0 :         assert(v->details != NULL);
                                536               0 :         if (cold != NULL)
 7368 tgl                       537 LBC           0 :             v->details->rm_extend.rm_so = OFF(cold);
                                538                 :         else
                                539               0 :             v->details->rm_extend.rm_so = OFF(v->stop);
 2118                           540               0 :         v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
 7368 tgl                       541 ECB             :     }
 7368 tgl                       542 CBC         145 :     return ret;
                                543                 : }
 7368 tgl                       544 EUB             : 
                                545                 : /*
                                546                 :  * cfindloop - the heart of cfind
                                547                 :  */
                                548                 : static int
 2118 tgl                       549 GBC         145 : cfindloop(struct vars *v,
                                550                 :           struct cnfa *cnfa,
 2118 tgl                       551 ECB             :           struct colormap *cm,
                                552                 :           struct dfa *d,
                                553                 :           struct dfa *s,
                                554                 :           chr **coldp)          /* where to put coldstart pointer */
                                555                 : {
                                556                 :     chr        *begin;
                                557                 :     chr        *end;
 7188 bruce                     558                 :     chr        *cold;
                                559                 :     chr        *open;           /* open and close of range of possible starts */
                                560                 :     chr        *close;
                                561                 :     chr        *estart;
                                562                 :     chr        *estop;
                                563                 :     int         er;
 7188 bruce                     564 GIC         145 :     int         shorter = v->g->tree->flags & SHORTER;
                                565                 :     int         hitend;
                                566                 : 
 7368 tgl                       567             145 :     assert(d != NULL && s != NULL);
                                568             145 :     cold = NULL;
 6482 bruce                     569             145 :     close = v->search_start;
                                570                 :     do
                                571                 :     {
                                572                 :         /* Search with the search RE for match range at/beyond "close" */
 7368 tgl                       573 ECB             :         MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
 7188 bruce                     574 GIC         161 :         close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
 2746 tgl                       575             161 :         if (ISERR())
 2746 tgl                       576 ECB             :         {
 2746 tgl                       577 LBC           0 :             *coldp = cold;
                                578               0 :             return v->err;
                                579                 :         }
 7368 tgl                       580 GIC         161 :         if (close == NULL)
 2746                           581              15 :             break;              /* no more possible match anywhere */
 7368                           582             146 :         assert(cold != NULL);
 7368 tgl                       583 CBC         146 :         open = cold;
                                584             146 :         cold = NULL;
                                585                 :         /* Search for matches starting between "open" and "close" inclusive */
 7368 tgl                       586 EUB             :         MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
 7188 bruce                     587 GBC         513 :         for (begin = open; begin <= close; begin++)
                                588                 :         {
 7368 tgl                       589 ECB             :             MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
 7368 tgl                       590 CBC         452 :             estart = begin;
                                591             452 :             estop = v->stop;
 7188 bruce                     592 ECB             :             for (;;)
                                593                 :             {
                                594                 :                 /* Here we use the top node's detailed RE */
 7368 tgl                       595 GIC         646 :                 if (shorter)
 7368 tgl                       596 CBC          97 :                     end = shortest(v, d, begin, estart,
                                597                 :                                    estop, (chr **) NULL, &hitend);
                                598                 :                 else
                                599             549 :                     end = longest(v, d, begin, estop,
 7188 bruce                     600 ECB             :                                   &hitend);
 2746 tgl                       601 GIC         646 :                 if (ISERR())
                                602                 :                 {
 2746 tgl                       603 UIC           0 :                     *coldp = cold;
 2746 tgl                       604 LBC           0 :                     return v->err;
 2746 tgl                       605 ECB             :                 }
 7368 tgl                       606 GIC         646 :                 if (hitend && cold == NULL)
                                607             104 :                     cold = begin;
 7368 tgl                       608 CBC         646 :                 if (end == NULL)
 2746 tgl                       609 GIC         349 :                     break;      /* no match with this begin point, try next */
 7368 tgl                       610 ECB             :                 MDEBUG(("tentative end %ld\n", LOFF(end)));
                                611                 :                 /* Dissect the potential match to see if it really matches */
 7368 tgl                       612 GBC         297 :                 er = cdissect(v, v->g->tree, begin, end);
 7188 bruce                     613             297 :                 if (er == REG_OKAY)
                                614                 :                 {
 7188 bruce                     615 CBC          85 :                     if (v->nmatch > 0)
 7188 bruce                     616 ECB             :                     {
 7368 tgl                       617 CBC          85 :                         v->pmatch[0].rm_so = OFF(begin);
                                618              85 :                         v->pmatch[0].rm_eo = OFF(end);
                                619                 :                     }
 7368 tgl                       620 GIC          85 :                     *coldp = cold;
 7368 tgl                       621 CBC          85 :                     return REG_OKAY;
 7368 tgl                       622 ECB             :                 }
 7188 bruce                     623 GIC         212 :                 if (er != REG_NOMATCH)
 7188 bruce                     624 ECB             :                 {
 7368 tgl                       625 UIC           0 :                     ERR(er);
 6406 tgl                       626 LBC           0 :                     *coldp = cold;
 7368                           627               0 :                     return er;
                                628                 :                 }
 2746 tgl                       629 ECB             :                 /* Try next longer/shorter match with same begin point */
 7368 tgl                       630 CBC         212 :                 if (shorter)
                                631                 :                 {
 3552                           632              81 :                     if (end == estop)
 2746 tgl                       633 GIC          12 :                         break;  /* no more, so try next begin point */
 7368 tgl                       634 GBC          69 :                     estart = end + 1;
 3552 tgl                       635 EUB             :                 }
 7368                           636                 :                 else
                                637                 :                 {
 3552 tgl                       638 GIC         131 :                     if (end == begin)
 2746 tgl                       639 CBC           6 :                         break;  /* no more, so try next begin point */
 7368 tgl                       640 GIC         125 :                     estop = end - 1;
 3552 tgl                       641 ECB             :                 }
                                642                 :             }                   /* end loop over endpoint positions */
                                643                 :         }                       /* end loop over beginning positions */
                                644                 : 
                                645                 :         /*
                                646                 :          * If we get here, there is no possible match starting at or before
 2746                           647                 :          * "close", so consider matches beyond that.  We'll do a fresh search
                                648                 :          * with the search RE to find a new promising match range.
                                649                 :          */
 2746 tgl                       650 GIC          61 :         close++;
 7368                           651              61 :     } while (close < v->stop);
                                652                 : 
                                653              60 :     *coldp = cold;
                                654              60 :     return REG_NOMATCH;
                                655                 : }
                                656                 : 
                                657                 : /*
                                658                 :  * zapallsubs - initialize all subexpression matches to "no match"
  594 tgl                       659 ECB             :  *
                                660                 :  * Note that p[0], the overall-match location, is not touched.
                                661                 :  */
 7368                           662                 : static void
 4062 tgl                       663 CBC     1004872 : zapallsubs(regmatch_t *p,
                                664                 :            size_t n)
                                665                 : {
                                666                 :     size_t      i;
                                667                 : 
 7188 bruce                     668 GIC     1012596 :     for (i = n - 1; i > 0; i--)
                                669                 :     {
 7368 tgl                       670            7724 :         p[i].rm_so = -1;
                                671            7724 :         p[i].rm_eo = -1;
 7368 tgl                       672 ECB             :     }
 7368 tgl                       673 GIC     1004872 : }
                                674                 : 
                                675                 : /*
                                676                 :  * zaptreesubs - initialize subexpressions within subtree to "no match"
 7368 tgl                       677 ECB             :  */
                                678                 : static void
 2118 tgl                       679 CBC         520 : zaptreesubs(struct vars *v,
 2118 tgl                       680 ECB             :             struct subre *t)
                                681                 : {
  778 tgl                       682 CBC         520 :     int         n = t->capno;
                                683                 :     struct subre *t2;
                                684                 : 
  778 tgl                       685 GIC         520 :     if (n > 0)
                                686                 :     {
 3972                           687             243 :         if ((size_t) n < v->nmatch)
 3972 tgl                       688 ECB             :         {
 3972 tgl                       689 GIC         243 :             v->pmatch[n].rm_so = -1;
                                690             243 :             v->pmatch[n].rm_eo = -1;
 3972 tgl                       691 ECB             :         }
                                692                 :     }
                                693                 : 
  778 tgl                       694 CBC         695 :     for (t2 = t->child; t2 != NULL; t2 = t2->sibling)
  778 tgl                       695 GIC         175 :         zaptreesubs(v, t2);
 7368 tgl                       696 CBC         520 : }
                                697                 : 
 7368 tgl                       698 ECB             : /*
 3972                           699                 :  * subset - set subexpression match data for a successful subre
                                700                 :  */
                                701                 : static void
 2118 tgl                       702 GIC        4059 : subset(struct vars *v,
 2118 tgl                       703 ECB             :        struct subre *sub,
 7184 bruce                     704                 :        chr *begin,
                                705                 :        chr *end)
                                706                 : {
  778 tgl                       707 GIC        4059 :     int         n = sub->capno;
                                708                 : 
 7368                           709            4059 :     assert(n > 0);
 7188 bruce                     710            4059 :     if ((size_t) n >= v->nmatch)
 7368 tgl                       711 CBC           9 :         return;
                                712                 : 
                                713                 :     MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end)));
 7368 tgl                       714 GIC        4050 :     v->pmatch[n].rm_so = OFF(begin);
                                715            4050 :     v->pmatch[n].rm_eo = OFF(end);
 7368 tgl                       716 ECB             : }
                                717                 : 
                                718                 : /*
 4062                           719                 :  * cdissect - check backrefs and determine subexpression matches
                                720                 :  *
                                721                 :  * cdissect recursively processes a subre tree to check matching of backrefs
                                722                 :  * and/or identify submatch boundaries for capture nodes.  The proposed match
                                723                 :  * runs from "begin" to "end" (not including "end"), and we are basically
                                724                 :  * "dissecting" it to see where the submatches are.
                                725                 :  *
                                726                 :  * Before calling any level of cdissect, the caller must have run the node's
                                727                 :  * DFA and found that the proposed substring satisfies the DFA.  (We make
                                728                 :  * the caller do that because in concatenation and iteration nodes, it's
                                729                 :  * much faster to check all the substrings against the child DFAs before we
                                730                 :  * recurse.)
                                731                 :  *
                                732                 :  * A side-effect of a successful match is to save match locations for
                                733                 :  * capturing subexpressions in v->pmatch[].  This is a little bit tricky,
                                734                 :  * so we make the following rules:
                                735                 :  * 1. Before initial entry to cdissect, all match data must have been
                                736                 :  *    cleared (this is seen to by zapallsubs).
                                737                 :  * 2. Before any recursive entry to cdissect, the match data for that
                                738                 :  *    subexpression tree must be guaranteed clear (see zaptreesubs).
                                739                 :  * 3. When returning REG_OKAY, each level of cdissect will have saved
                                740                 :  *    any relevant match locations.
                                741                 :  * 4. When returning REG_NOMATCH, each level of cdissect will guarantee
                                742                 :  *    that its subexpression match locations are again clear.
                                743                 :  * 5. No guarantees are made for error cases (i.e., other result codes).
                                744                 :  * 6. When a level of cdissect abandons a successful sub-match, it will
                                745                 :  *    clear that subtree's match locations with zaptreesubs before trying
                                746                 :  *    any new DFA match or cdissect call for that subtree or any subtree
                                747                 :  *    to its right (that is, any subtree that could have a backref into the
                                748                 :  *    abandoned match).
                                749                 :  * This may seem overly complicated, but it's difficult to simplify it
                                750                 :  * because of the provision that match locations must be reset before
                                751                 :  * any fresh DFA match (a rule that is needed to make dfa_backref safe).
                                752                 :  * That means it won't work to just reset relevant match locations at the
                                753                 :  * start of each cdissect level.
                                754                 :  */
                                755                 : static int                      /* regexec return code */
 2118 tgl                       756 GIC       15248 : cdissect(struct vars *v,
                                757                 :          struct subre *t,
                                758                 :          chr *begin,            /* beginning of relevant substring */
                                759                 :          chr *end)              /* end of same */
                                760                 : {
                                761                 :     int         er;
                                762                 : 
 7368                           763           15248 :     assert(t != NULL);
                                764                 :     MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
 7368 tgl                       765 ECB             : 
                                766                 :     /* handy place to check for operation cancel */
    1 tmunro                    767 GNC       15248 :     INTERRUPT(v->re);
                                768                 :     /* ... and stack overrun */
 2746 tgl                       769 GIC       15248 :     if (STACK_TOO_DEEP(v->re))
 2746 tgl                       770 UIC           0 :         return REG_ETOOBIG;
 3326 tgl                       771 ECB             : 
 7188 bruce                     772 GIC       15248 :     switch (t->op)
                                773                 :     {
                                774            8407 :         case '=':               /* terminal node */
  778 tgl                       775 CBC        8407 :             assert(t->child == NULL);
 4062 tgl                       776 GIC        8407 :             er = REG_OKAY;      /* no action, parent did the work */
 4062 tgl                       777 CBC        8407 :             break;
 4066 tgl                       778 GBC         209 :         case 'b':               /* back reference */
  778 tgl                       779 GIC         209 :             assert(t->child == NULL);
 4062 tgl                       780 CBC         209 :             er = cbrdissect(v, t, begin, end);
 4062 tgl                       781 GIC         209 :             break;
 7188 bruce                     782 CBC        6422 :         case '.':               /* concatenation */
  778 tgl                       783            6422 :             assert(t->child != NULL);
                                784            6422 :             if (t->child->flags & SHORTER)    /* reverse scan */
 4062                           785             164 :                 er = crevcondissect(v, t, begin, end);
 4062 tgl                       786 ECB             :             else
 4062 tgl                       787 CBC        6258 :                 er = ccondissect(v, t, begin, end);
                                788            6422 :             break;
                                789              68 :         case '|':               /* alternation */
  778                           790              68 :             assert(t->child != NULL);
 4062                           791              68 :             er = caltdissect(v, t, begin, end);
                                792              68 :             break;
                                793             112 :         case '*':               /* iteration */
  778 tgl                       794 GIC         112 :             assert(t->child != NULL);
  778 tgl                       795 CBC         112 :             if (t->child->flags & SHORTER)    /* reverse scan */
 4062                           796               7 :                 er = creviterdissect(v, t, begin, end);
 4062 tgl                       797 ECB             :             else
 4062 tgl                       798 CBC         105 :                 er = citerdissect(v, t, begin, end);
                                799             112 :             break;
  778                           800              30 :         case '(':               /* no-op capture node */
                                801              30 :             assert(t->child != NULL);
                                802              30 :             er = cdissect(v, t->child, begin, end);
 4062                           803              30 :             break;
 7188 bruce                     804 LBC           0 :         default:
 4062 tgl                       805 UIC           0 :             er = REG_ASSERT;
 4062 tgl                       806 LBC           0 :             break;
 7368 tgl                       807 ECB             :     }
 4062                           808                 : 
                                809                 :     /*
                                810                 :      * We should never have a match failure unless backrefs lurk below;
                                811                 :      * otherwise, either caller failed to check the DFA, or there's some
 4062 tgl                       812 EUB             :      * inconsistency between the DFA and the node's innards.
                                813                 :      */
 4062 tgl                       814 GBC       15248 :     assert(er != REG_NOMATCH || (t->flags & BACKR));
                                815                 : 
                                816                 :     /*
                                817                 :      * If this node is marked as capturing, save successful match's location.
                                818                 :      */
  778 tgl                       819 GIC       15248 :     if (t->capno > 0 && er == REG_OKAY)
                                820            4059 :         subset(v, t, begin, end);
                                821                 : 
 4062 tgl                       822 CBC       15248 :     return er;
                                823                 : }
                                824                 : 
                                825                 : /*
                                826                 :  * ccondissect - dissect match for concatenation node
 7368 tgl                       827 ECB             :  */
 7188 bruce                     828                 : static int                      /* regexec return code */
 2118 tgl                       829 GIC        6258 : ccondissect(struct vars *v,
 2118 tgl                       830 ECB             :             struct subre *t,
                                831                 :             chr *begin,         /* beginning of relevant substring */
                                832                 :             chr *end)           /* end of same */
                                833                 : {
  778 tgl                       834 GIC        6258 :     struct subre *left = t->child;
                                835            6258 :     struct subre *right = left->sibling;
                                836                 :     struct dfa *d;
 7368 tgl                       837 ECB             :     struct dfa *d2;
                                838                 :     chr        *mid;
                                839                 :     int         er;
                                840                 : 
 7368 tgl                       841 GIC        6258 :     assert(t->op == '.');
  778 tgl                       842 CBC        6258 :     assert(left != NULL && left->cnfa.nstates > 0);
                                843            6258 :     assert(right != NULL && right->cnfa.nstates > 0);
  778 tgl                       844 GIC        6258 :     assert(right->sibling == NULL);
                                845            6258 :     assert(!(left->flags & SHORTER));
                                846                 : 
                                847            6258 :     d = getsubdfa(v, left);
 4062                           848            6258 :     NOERR();
  778 tgl                       849 CBC        6258 :     d2 = getsubdfa(v, right);
 4062                           850            6258 :     NOERR();
  781 tgl                       851 ECB             :     MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
 7368                           852                 : 
                                853                 :     /* pick a tentative midpoint */
 4062 tgl                       854 GIC        6258 :     mid = longest(v, d, begin, end, (int *) NULL);
 2746 tgl                       855 CBC        6258 :     NOERR();
 4062                           856            6258 :     if (mid == NULL)
                                857               8 :         return REG_NOMATCH;
  781 tgl                       858 ECB             :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
                                859                 : 
                                860                 :     /* iterate until satisfaction or failure */
                                861                 :     for (;;)
 7188 bruce                     862                 :     {
 7368 tgl                       863                 :         /* try this midpoint on for size */
 4815 tgl                       864 CBC        7772 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
 7188 bruce                     865 ECB             :         {
  778 tgl                       866 GIC        6209 :             er = cdissect(v, left, begin, mid);
 4815                           867            6209 :             if (er == REG_OKAY)
                                868                 :             {
  778                           869            6159 :                 er = cdissect(v, right, mid, end);
 4815                           870            6159 :                 if (er == REG_OKAY)
                                871                 :                 {
 4815 tgl                       872 ECB             :                     /* satisfaction */
                                873                 :                     MDEBUG(("%d: successful\n", t->id));
 4815 tgl                       874 CBC        5911 :                     return REG_OKAY;
 4815 tgl                       875 ECB             :                 }
                                876                 :                 /* Reset left's matches (right should have done so itself) */
  594 tgl                       877 CBC         248 :                 zaptreesubs(v, left);
 4815 tgl                       878 ECB             :             }
 4062 tgl                       879 GIC         298 :             if (er != REG_NOMATCH)
 4815 tgl                       880 UIC           0 :                 return er;
                                881                 :         }
 2746 tgl                       882 CBC        1861 :         NOERR();
                                883                 : 
                                884                 :         /* that midpoint didn't work, find a new one */
 7188 bruce                     885            1861 :         if (mid == begin)
                                886                 :         {
 7368 tgl                       887 ECB             :             /* all possibilities exhausted */
  781 tgl                       888 EUB             :             MDEBUG(("%d: no midpoint\n", t->id));
 7368 tgl                       889 GIC          75 :             return REG_NOMATCH;
 7368 tgl                       890 ECB             :         }
 7188 bruce                     891 GIC        1786 :         mid = longest(v, d, begin, mid - 1, (int *) NULL);
 2746 tgl                       892            1786 :         NOERR();
 7188 bruce                     893 CBC        1786 :         if (mid == NULL)
                                894                 :         {
                                895                 :             /* failed to find a new one */
                                896                 :             MDEBUG(("%d: failed midpoint\n", t->id));
 7368 tgl                       897             264 :             return REG_NOMATCH;
                                898                 :         }
 4062 tgl                       899 ECB             :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
 7368                           900                 :     }
                                901                 : 
                                902                 :     /* can't get here */
                                903                 :     return REG_ASSERT;
                                904                 : }
                                905                 : 
                                906                 : /*
                                907                 :  * crevcondissect - dissect match for concatenation node, shortest-first
                                908                 :  */
                                909                 : static int                      /* regexec return code */
 2118 tgl                       910 GIC         164 : crevcondissect(struct vars *v,
                                911                 :                struct subre *t,
                                912                 :                chr *begin,      /* beginning of relevant substring */
                                913                 :                chr *end)        /* end of same */
                                914                 : {
  778                           915             164 :     struct subre *left = t->child;
                                916             164 :     struct subre *right = left->sibling;
                                917                 :     struct dfa *d;
 7368 tgl                       918 ECB             :     struct dfa *d2;
                                919                 :     chr        *mid;
                                920                 :     int         er;
                                921                 : 
 7368 tgl                       922 GIC         164 :     assert(t->op == '.');
  778 tgl                       923 CBC         164 :     assert(left != NULL && left->cnfa.nstates > 0);
                                924             164 :     assert(right != NULL && right->cnfa.nstates > 0);
  778 tgl                       925 GIC         164 :     assert(right->sibling == NULL);
                                926             164 :     assert(left->flags & SHORTER);
                                927                 : 
                                928             164 :     d = getsubdfa(v, left);
 4062                           929             164 :     NOERR();
  778 tgl                       930 CBC         164 :     d2 = getsubdfa(v, right);
 4062                           931             164 :     NOERR();
  781 tgl                       932 ECB             :     MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
 7368                           933                 : 
                                934                 :     /* pick a tentative midpoint */
 4062 tgl                       935 GIC         164 :     mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
 2746 tgl                       936 CBC         164 :     NOERR();
 4062                           937             164 :     if (mid == NULL)
 4062 tgl                       938 LBC           0 :         return REG_NOMATCH;
  781 tgl                       939 ECB             :     MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid)));
                                940                 : 
                                941                 :     /* iterate until satisfaction or failure */
                                942                 :     for (;;)
 7188 bruce                     943                 :     {
 7368 tgl                       944                 :         /* try this midpoint on for size */
 4815 tgl                       945 CBC         605 :         if (longest(v, d2, mid, end, (int *) NULL) == end)
 7188 bruce                     946 EUB             :         {
  778 tgl                       947 GIC          93 :             er = cdissect(v, left, begin, mid);
 4815                           948              93 :             if (er == REG_OKAY)
                                949                 :             {
  778                           950              93 :                 er = cdissect(v, right, mid, end);
 4815                           951              93 :                 if (er == REG_OKAY)
                                952                 :                 {
 4815 tgl                       953 ECB             :                     /* satisfaction */
                                954                 :                     MDEBUG(("%d: successful\n", t->id));
 4815 tgl                       955 CBC          81 :                     return REG_OKAY;
 4815 tgl                       956 ECB             :                 }
                                957                 :                 /* Reset left's matches (right should have done so itself) */
  594 tgl                       958 CBC          12 :                 zaptreesubs(v, left);
 4815 tgl                       959 ECB             :             }
 4062 tgl                       960 GIC          12 :             if (er != REG_NOMATCH)
 4815 tgl                       961 UIC           0 :                 return er;
                                962                 :         }
 2746 tgl                       963 CBC         524 :         NOERR();
                                964                 : 
                                965                 :         /* that midpoint didn't work, find a new one */
 7188 bruce                     966             524 :         if (mid == end)
                                967                 :         {
 7368 tgl                       968 ECB             :             /* all possibilities exhausted */
  781 tgl                       969 EUB             :             MDEBUG(("%d: no midpoint\n", t->id));
 7368 tgl                       970 GIC          83 :             return REG_NOMATCH;
 7368 tgl                       971 ECB             :         }
 7188 bruce                     972 GIC         441 :         mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
 2746 tgl                       973             441 :         NOERR();
 7188 bruce                     974 CBC         441 :         if (mid == NULL)
                                975                 :         {
                                976                 :             /* failed to find a new one */
                                977                 :             MDEBUG(("%d: failed midpoint\n", t->id));
 7368 tgl                       978 LBC           0 :             return REG_NOMATCH;
                                979                 :         }
 4062 tgl                       980 ECB             :         MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
 7368                           981                 :     }
                                982                 : 
                                983                 :     /* can't get here */
                                984                 :     return REG_ASSERT;
                                985                 : }
 7368 tgl                       986 EUB             : 
                                987                 : /*
                                988                 :  * cbrdissect - dissect match for backref node
                                989                 :  *
                                990                 :  * The backref match might already have been verified by dfa_backref(),
                                991                 :  * but we don't know that for sure so must check it here.
                                992                 :  */
                                993                 : static int                      /* regexec return code */
 2118 tgl                       994 GIC         209 : cbrdissect(struct vars *v,
                                995                 :            struct subre *t,
                                996                 :            chr *begin,          /* beginning of relevant substring */
                                997                 :            chr *end)            /* end of same */
                                998                 : {
  778                           999             209 :     int         n = t->backno;
                               1000                 :     size_t      numreps;
                               1001                 :     size_t      tlen;
 4066 tgl                      1002 ECB             :     size_t      brlen;
                               1003                 :     chr        *brstring;
                               1004                 :     chr        *p;
 7188 bruce                    1005 GIC         209 :     int         min = t->min;
                               1006             209 :     int         max = t->max;
 7368 tgl                      1007 ECB             : 
 7368 tgl                      1008 GIC         209 :     assert(t != NULL);
                               1009             209 :     assert(t->op == 'b');
                               1010             209 :     assert(n >= 0);
 7188 bruce                    1011             209 :     assert((size_t) n < v->nmatch);
                               1012                 : 
  781 tgl                      1013 ECB             :     MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max,
                               1014                 :             LOFF(begin), LOFF(end)));
                               1015                 : 
 4066                          1016                 :     /* get the backreferenced string */
 7368 tgl                      1017 CBC         209 :     if (v->pmatch[n].rm_so == -1)
                               1018              55 :         return REG_NOMATCH;
 4066                          1019             154 :     brstring = v->start + v->pmatch[n].rm_so;
 4066 tgl                      1020 GIC         154 :     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
                               1021                 : 
                               1022                 :     /* special cases for zero-length strings */
                               1023             154 :     if (brlen == 0)
                               1024                 :     {
 4066 tgl                      1025 ECB             :         /*
                               1026                 :          * matches only if target is zero length, but any number of
                               1027                 :          * repetitions can be considered to be present
                               1028                 :          */
 4066 tgl                      1029 GIC           7 :         if (begin == end && min <= max)
                               1030                 :         {
  781 tgl                      1031 ECB             :             MDEBUG(("%d: backref matched trivially\n", t->id));
 4066 tgl                      1032 GIC           7 :             return REG_OKAY;
                               1033                 :         }
 4066 tgl                      1034 UIC           0 :         return REG_NOMATCH;
                               1035                 :     }
 4066 tgl                      1036 GIC         147 :     if (begin == end)
 7188 bruce                    1037 ECB             :     {
                               1038                 :         /* matches only if zero repetitions are okay */
 4066 tgl                      1039 GIC           6 :         if (min == 0)
 4066 tgl                      1040 ECB             :         {
                               1041                 :             MDEBUG(("%d: backref matched trivially\n", t->id));
 7368 tgl                      1042 GBC           5 :             return REG_OKAY;
                               1043                 :         }
 7368 tgl                      1044 CBC           1 :         return REG_NOMATCH;
                               1045                 :     }
                               1046                 : 
 4066 tgl                      1047 ECB             :     /*
                               1048                 :      * check target length to see if it could possibly be an allowed number of
                               1049                 :      * repetitions of brstring
                               1050                 :      */
 4066 tgl                      1051 GIC         141 :     assert(end > begin);
 4066 tgl                      1052 CBC         141 :     tlen = end - begin;
 4066 tgl                      1053 GIC         141 :     if (tlen % brlen != 0)
                               1054               5 :         return REG_NOMATCH;
                               1055             136 :     numreps = tlen / brlen;
 2762                          1056             136 :     if (numreps < min || (numreps > max && max != DUPINF))
 7368                          1057               3 :         return REG_NOMATCH;
                               1058                 : 
 4066 tgl                      1059 ECB             :     /* okay, compare the actual string contents */
 4066 tgl                      1060 CBC         133 :     p = begin;
                               1061             236 :     while (numreps-- > 0)
 7188 bruce                    1062 ECB             :     {
 4066 tgl                      1063 CBC         151 :         if ((*v->g->compare) (brstring, p, brlen) != 0)
                               1064              48 :             return REG_NOMATCH;
                               1065             103 :         p += brlen;
                               1066                 :     }
                               1067                 : 
  781 tgl                      1068 ECB             :     MDEBUG(("%d: backref matched\n", t->id));
 4066 tgl                      1069 CBC          85 :     return REG_OKAY;
                               1070                 : }
 7368 tgl                      1071 ECB             : 
                               1072                 : /*
 4062                          1073                 :  * caltdissect - dissect match for alternation node
                               1074                 :  */
                               1075                 : static int                      /* regexec return code */
 2118 tgl                      1076 GIC          68 : caltdissect(struct vars *v,
 2118 tgl                      1077 ECB             :             struct subre *t,
                               1078                 :             chr *begin,         /* beginning of relevant substring */
                               1079                 :             chr *end)           /* end of same */
                               1080                 : {
                               1081                 :     struct dfa *d;
                               1082                 :     int         er;
                               1083                 : 
  778 tgl                      1084 CBC          68 :     assert(t->op == '|');
                               1085                 : 
  778 tgl                      1086 GIC          68 :     t = t->child;
                               1087                 :     /* there should be at least 2 alternatives */
                               1088              68 :     assert(t != NULL && t->sibling != NULL);
                               1089                 : 
 4062                          1090             125 :     while (t != NULL)
                               1091                 :     {
  778 tgl                      1092 CBC          99 :         assert(t->cnfa.nstates > 0);
                               1093                 : 
  781 tgl                      1094 ECB             :         MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
                               1095                 : 
  778 tgl                      1096 CBC          99 :         d = getsubdfa(v, t);
 4062 tgl                      1097 GIC          99 :         NOERR();
 4062 tgl                      1098 CBC          99 :         if (longest(v, d, begin, end, (int *) NULL) == end)
                               1099                 :         {
  781 tgl                      1100 ECB             :             MDEBUG(("%d: caltdissect matched\n", t->id));
  778 tgl                      1101 GIC          76 :             er = cdissect(v, t, begin, end);
 4062                          1102              76 :             if (er != REG_NOMATCH)
                               1103              42 :                 return er;
 4062 tgl                      1104 ECB             :         }
 2746 tgl                      1105 CBC          57 :         NOERR();
 7368 tgl                      1106 ECB             : 
  778 tgl                      1107 GIC          57 :         t = t->sibling;
                               1108                 :     }
 7368 tgl                      1109 ECB             : 
 4062 tgl                      1110 CBC          26 :     return REG_NOMATCH;
 7368 tgl                      1111 ECB             : }
                               1112                 : 
 4062                          1113                 : /*
                               1114                 :  * citerdissect - dissect match for iteration node
                               1115                 :  */
                               1116                 : static int                      /* regexec return code */
 2118 tgl                      1117 GIC         105 : citerdissect(struct vars *v,
 2118 tgl                      1118 ECB             :              struct subre *t,
                               1119                 :              chr *begin,        /* beginning of relevant substring */
                               1120                 :              chr *end)          /* end of same */
                               1121                 : {
                               1122                 :     struct dfa *d;
                               1123                 :     chr       **endpts;
                               1124                 :     chr        *limit;
 4062                          1125                 :     int         min_matches;
                               1126                 :     size_t      max_matches;
                               1127                 :     int         nverified;
                               1128                 :     int         k;
                               1129                 :     int         i;
                               1130                 :     int         er;
                               1131                 : 
 4062 tgl                      1132 GIC         105 :     assert(t->op == '*');
  778                          1133             105 :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
                               1134             105 :     assert(!(t->child->flags & SHORTER));
 4062                          1135             105 :     assert(begin <= end);
                               1136                 : 
                               1137                 :     MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
                               1138                 : 
                               1139                 :     /*
 2746 tgl                      1140 ECB             :      * For the moment, assume the minimum number of matches is 1.  If zero
                               1141                 :      * matches are allowed, and the target string is empty, we are allowed to
                               1142                 :      * match regardless of the contents of the iter node --- but we would
                               1143                 :      * prefer to match once, so that capturing parens get set.  (An example of
                               1144                 :      * the concern here is a pattern like "()*\1", which historically this
                               1145                 :      * code has allowed to succeed.)  Therefore, we deal with the zero-matches
                               1146                 :      * case at the bottom, after failing to find any other way to match.
                               1147                 :      */
 4062 tgl                      1148 GIC         105 :     min_matches = t->min;
                               1149             105 :     if (min_matches <= 0)
                               1150              69 :         min_matches = 1;
                               1151                 : 
                               1152                 :     /*
                               1153                 :      * We need workspace to track the endpoints of each sub-match.  Normally
                               1154                 :      * we consider only nonzero-length sub-matches, so there can be at most
                               1155                 :      * end-begin of them.  However, if min is larger than that, we will also
 4062 tgl                      1156 ECB             :      * consider zero-length sub-matches in order to find enough matches.
                               1157                 :      *
                               1158                 :      * For convenience, endpts[0] contains the "begin" pointer and we store
                               1159                 :      * sub-match endpoints in endpts[1..max_matches].
                               1160                 :      */
 4062 tgl                      1161 GIC         105 :     max_matches = end - begin;
 2762                          1162             105 :     if (max_matches > t->max && t->max != DUPINF)
 4062 tgl                      1163 UIC           0 :         max_matches = t->max;
 4062 tgl                      1164 GIC         105 :     if (max_matches < min_matches)
                               1165              61 :         max_matches = min_matches;
                               1166             105 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
                               1167             105 :     if (endpts == NULL)
 4062 tgl                      1168 UIC           0 :         return REG_ESPACE;
 4062 tgl                      1169 CBC         105 :     endpts[0] = begin;
 4062 tgl                      1170 ECB             : 
  778 tgl                      1171 GBC         105 :     d = getsubdfa(v, t->child);
 4062 tgl                      1172 CBC         105 :     if (ISERR())
 4062 tgl                      1173 ECB             :     {
 4062 tgl                      1174 LBC           0 :         FREE(endpts);
                               1175               0 :         return v->err;
 4062 tgl                      1176 EUB             :     }
 4062 tgl                      1177 ECB             : 
                               1178                 :     /*
                               1179                 :      * Our strategy is to first find a set of sub-match endpoints that are
                               1180                 :      * valid according to the child node's DFA, and then recursively dissect
                               1181                 :      * each sub-match to confirm validity.  If any validity check fails,
  597 tgl                      1182 EUB             :      * backtrack that sub-match and try again.  And, when we next try for a
                               1183                 :      * validity check, we need not recheck any successfully verified
                               1184                 :      * sub-matches that we didn't move the endpoints of.  nverified remembers
                               1185                 :      * how many sub-matches are currently known okay.
                               1186                 :      */
                               1187                 : 
                               1188                 :     /* initialize to consider first sub-match */
 4062 tgl                      1189 GIC         105 :     nverified = 0;
                               1190             105 :     k = 1;
                               1191             105 :     limit = end;
                               1192                 : 
                               1193                 :     /* iterate until satisfaction or failure */
                               1194             479 :     while (k > 0)
                               1195                 :     {
                               1196                 :         /* try to find an endpoint for the k'th sub-match */
 4062 tgl                      1197 CBC         395 :         endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
 2746                          1198             395 :         if (ISERR())
 2746 tgl                      1199 ECB             :         {
 2746 tgl                      1200 UIC           0 :             FREE(endpts);
                               1201               0 :             return v->err;
 2746 tgl                      1202 ECB             :         }
 4062 tgl                      1203 GIC         395 :         if (endpts[k] == NULL)
                               1204                 :         {
 4062 tgl                      1205 ECB             :             /* no match possible, so see if we can shorten previous one */
 4062 tgl                      1206 CBC         200 :             k--;
 4062 tgl                      1207 GIC         200 :             goto backtrack;
 4062 tgl                      1208 EUB             :         }
                               1209                 :         MDEBUG(("%d: working endpoint %d: %ld\n",
                               1210                 :                 t->id, k, LOFF(endpts[k])));
 4062 tgl                      1211 ECB             : 
                               1212                 :         /* k'th sub-match can no longer be considered verified */
 4062 tgl                      1213 GIC         195 :         if (nverified >= k)
 4062 tgl                      1214 CBC           8 :             nverified = k - 1;
 4062 tgl                      1215 ECB             : 
 4062 tgl                      1216 GIC         195 :         if (endpts[k] != end)
                               1217                 :         {
                               1218                 :             /* haven't reached end yet, try another iteration if allowed */
                               1219             134 :             if (k >= max_matches)
                               1220                 :             {
 4062 tgl                      1221 ECB             :                 /* must try to shorten some previous match */
 4062 tgl                      1222 LBC           0 :                 k--;
 4062 tgl                      1223 UIC           0 :                 goto backtrack;
 4062 tgl                      1224 ECB             :             }
                               1225                 : 
                               1226                 :             /* reject zero-length match unless necessary to achieve min */
 4062 tgl                      1227 CBC         134 :             if (endpts[k] == endpts[k - 1] &&
 4062 tgl                      1228 UIC           0 :                 (k >= min_matches || min_matches - k < end - endpts[k]))
                               1229               0 :                 goto backtrack;
 4062 tgl                      1230 EUB             : 
 4062 tgl                      1231 GBC         134 :             k++;
 4062 tgl                      1232 GIC         134 :             limit = end;
                               1233             134 :             continue;
                               1234                 :         }
 4062 tgl                      1235 ECB             : 
 4062 tgl                      1236 EUB             :         /*
 3955 bruce                    1237                 :          * We've identified a way to divide the string into k sub-matches that
                               1238                 :          * works so far as the child DFA can tell.  If k is an allowed number
 3955 bruce                    1239 ECB             :          * of matches, start the slow part: recurse to verify each sub-match.
                               1240                 :          * We always have k <= max_matches, needn't check that.
 4062 tgl                      1241                 :          */
 4062 tgl                      1242 GIC          61 :         if (k < min_matches)
 4062 tgl                      1243 UIC           0 :             goto backtrack;
                               1244                 : 
                               1245                 :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
                               1246                 : 
 4062 tgl                      1247 GIC         100 :         for (i = nverified + 1; i <= k; i++)
                               1248                 :         {
                               1249                 :             /* zap any match data from a non-last iteration */
  778 tgl                      1250 CBC          79 :             zaptreesubs(v, t->child);
  778 tgl                      1251 GBC          79 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
 4062 tgl                      1252 GIC          79 :             if (er == REG_OKAY)
                               1253                 :             {
                               1254              39 :                 nverified = i;
 4062 tgl                      1255 CBC          39 :                 continue;
                               1256                 :             }
 4062 tgl                      1257 GIC          40 :             if (er == REG_NOMATCH)
 4062 tgl                      1258 CBC          40 :                 break;
 4062 tgl                      1259 ECB             :             /* oops, something failed */
 4062 tgl                      1260 LBC           0 :             FREE(endpts);
 4062 tgl                      1261 UIC           0 :             return er;
 4062 tgl                      1262 ECB             :         }
                               1263                 : 
 4062 tgl                      1264 GIC          61 :         if (i > k)
 4062 tgl                      1265 ECB             :         {
                               1266                 :             /* satisfaction */
                               1267                 :             MDEBUG(("%d: successful\n", t->id));
 4062 tgl                      1268 GBC          21 :             FREE(endpts);
                               1269              21 :             return REG_OKAY;
                               1270                 :         }
                               1271                 : 
  597 tgl                      1272 ECB             :         /* i'th match failed to verify, so backtrack it */
  597 tgl                      1273 GIC          40 :         k = i;
                               1274                 : 
 4062                          1275             240 : backtrack:
 3955 bruce                    1276 ECB             : 
 4062 tgl                      1277                 :         /*
                               1278                 :          * Must consider shorter versions of the k'th sub-match.  However,
                               1279                 :          * we'll only ask for a zero-length match if necessary.
                               1280                 :          */
 4062 tgl                      1281 CBC         240 :         while (k > 0)
                               1282                 :         {
 3955 bruce                    1283             156 :             chr        *prev_end = endpts[k - 1];
                               1284                 : 
 4062 tgl                      1285 GIC         156 :             if (endpts[k] > prev_end)
                               1286                 :             {
                               1287             156 :                 limit = endpts[k] - 1;
                               1288             156 :                 if (limit > prev_end ||
 4062 tgl                      1289 LBC           0 :                     (k < min_matches && min_matches - k >= end - prev_end))
                               1290                 :                 {
 4062 tgl                      1291 ECB             :                     /* break out of backtrack loop, continue the outer one */
                               1292                 :                     break;
                               1293                 :                 }
                               1294                 :             }
                               1295                 :             /* can't shorten k'th sub-match any more, consider previous one */
 4062 tgl                      1296 LBC           0 :             k--;
 4062 tgl                      1297 EUB             :         }
                               1298                 :     }
                               1299                 : 
                               1300                 :     /* all possibilities exhausted */
 4062 tgl                      1301 GIC          84 :     FREE(endpts);
                               1302                 : 
                               1303                 :     /*
 2746 tgl                      1304 EUB             :      * Now consider the possibility that we can match to a zero-length string
                               1305                 :      * by using zero repetitions.
                               1306                 :      */
 2746 tgl                      1307 GIC          84 :     if (t->min == 0 && begin == end)
                               1308                 :     {
  781 tgl                      1309 ECB             :         MDEBUG(("%d: allowing zero matches\n", t->id));
 2746 tgl                      1310 GIC          56 :         return REG_OKAY;
                               1311                 :     }
                               1312                 : 
                               1313                 :     MDEBUG(("%d: failed\n", t->id));
 4062                          1314              28 :     return REG_NOMATCH;
 4062 tgl                      1315 ECB             : }
                               1316                 : 
                               1317                 : /*
                               1318                 :  * creviterdissect - dissect match for iteration node, shortest-first
                               1319                 :  */
                               1320                 : static int                      /* regexec return code */
 2118 tgl                      1321 GIC           7 : creviterdissect(struct vars *v,
 2118 tgl                      1322 ECB             :                 struct subre *t,
                               1323                 :                 chr *begin,     /* beginning of relevant substring */
                               1324                 :                 chr *end)       /* end of same */
                               1325                 : {
                               1326                 :     struct dfa *d;
                               1327                 :     chr       **endpts;
                               1328                 :     chr        *limit;
 4062                          1329                 :     int         min_matches;
                               1330                 :     size_t      max_matches;
                               1331                 :     int         nverified;
                               1332                 :     int         k;
                               1333                 :     int         i;
                               1334                 :     int         er;
                               1335                 : 
 4062 tgl                      1336 GIC           7 :     assert(t->op == '*');
  778                          1337               7 :     assert(t->child != NULL && t->child->cnfa.nstates > 0);
                               1338               7 :     assert(t->child->flags & SHORTER);
 4062                          1339               7 :     assert(begin <= end);
                               1340                 : 
                               1341                 :     MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end)));
                               1342                 : 
                               1343                 :     /*
 4062 tgl                      1344 ECB             :      * If zero matches are allowed, and target string is empty, just declare
                               1345                 :      * victory.  OTOH, if target string isn't empty, zero matches can't work
                               1346                 :      * so we pretend the min is 1.
                               1347                 :      */
 4062 tgl                      1348 GIC           7 :     min_matches = t->min;
                               1349               7 :     if (min_matches <= 0)
                               1350                 :     {
                               1351               7 :         if (begin == end)
                               1352                 :         {
                               1353                 :             MDEBUG(("%d: allowing zero matches\n", t->id));
                               1354               1 :             return REG_OKAY;
                               1355                 :         }
 4062 tgl                      1356 CBC           6 :         min_matches = 1;
 4062 tgl                      1357 ECB             :     }
                               1358                 : 
                               1359                 :     /*
                               1360                 :      * We need workspace to track the endpoints of each sub-match.  Normally
                               1361                 :      * we consider only nonzero-length sub-matches, so there can be at most
                               1362                 :      * end-begin of them.  However, if min is larger than that, we will also
                               1363                 :      * consider zero-length sub-matches in order to find enough matches.
                               1364                 :      *
                               1365                 :      * For convenience, endpts[0] contains the "begin" pointer and we store
                               1366                 :      * sub-match endpoints in endpts[1..max_matches].
                               1367                 :      */
 4062 tgl                      1368 GIC           6 :     max_matches = end - begin;
 2762                          1369               6 :     if (max_matches > t->max && t->max != DUPINF)
 4062                          1370               6 :         max_matches = t->max;
                               1371               6 :     if (max_matches < min_matches)
 4062 tgl                      1372 UIC           0 :         max_matches = min_matches;
 4062 tgl                      1373 GIC           6 :     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
                               1374               6 :     if (endpts == NULL)
 4062 tgl                      1375 UIC           0 :         return REG_ESPACE;
 4062 tgl                      1376 CBC           6 :     endpts[0] = begin;
 4062 tgl                      1377 ECB             : 
  778 tgl                      1378 CBC           6 :     d = getsubdfa(v, t->child);
 4062                          1379               6 :     if (ISERR())
 4062 tgl                      1380 EUB             :     {
 4062 tgl                      1381 LBC           0 :         FREE(endpts);
                               1382               0 :         return v->err;
 4062 tgl                      1383 EUB             :     }
 4062 tgl                      1384 ECB             : 
                               1385                 :     /*
                               1386                 :      * Our strategy is to first find a set of sub-match endpoints that are
                               1387                 :      * valid according to the child node's DFA, and then recursively dissect
                               1388                 :      * each sub-match to confirm validity.  If any validity check fails,
  597 tgl                      1389 EUB             :      * backtrack that sub-match and try again.  And, when we next try for a
                               1390                 :      * validity check, we need not recheck any successfully verified
                               1391                 :      * sub-matches that we didn't move the endpoints of.  nverified remembers
                               1392                 :      * how many sub-matches are currently known okay.
                               1393                 :      */
                               1394                 : 
                               1395                 :     /* initialize to consider first sub-match */
 4062 tgl                      1396 GIC           6 :     nverified = 0;
                               1397               6 :     k = 1;
                               1398               6 :     limit = begin;
                               1399                 : 
                               1400                 :     /* iterate until satisfaction or failure */
                               1401               8 :     while (k > 0)
                               1402                 :     {
                               1403                 :         /* disallow zero-length match unless necessary to achieve min */
 4062 tgl                      1404 CBC           6 :         if (limit == endpts[k - 1] &&
                               1405               6 :             limit != end &&
 4062 tgl                      1406 LBC           0 :             (k >= min_matches || min_matches - k < end - limit))
 4062 tgl                      1407 GIC           6 :             limit++;
                               1408                 : 
 3120 tgl                      1409 ECB             :         /* if this is the last allowed sub-match, it must reach to the end */
 3120 tgl                      1410 GIC           6 :         if (k >= max_matches)
                               1411               6 :             limit = end;
 3120 tgl                      1412 ECB             : 
 4062                          1413                 :         /* try to find an endpoint for the k'th sub-match */
 4062 tgl                      1414 GBC           6 :         endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
 4062 tgl                      1415 ECB             :                              (chr **) NULL, (int *) NULL);
 2746 tgl                      1416 GIC           6 :         if (ISERR())
                               1417                 :         {
 2746 tgl                      1418 LBC           0 :             FREE(endpts);
                               1419               0 :             return v->err;
                               1420                 :         }
 4062 tgl                      1421 GIC           6 :         if (endpts[k] == NULL)
 4062 tgl                      1422 ECB             :         {
                               1423                 :             /* no match possible, so see if we can lengthen previous one */
 4062 tgl                      1424 LBC           0 :             k--;
 4062 tgl                      1425 UIC           0 :             goto backtrack;
 4062 tgl                      1426 EUB             :         }
                               1427                 :         MDEBUG(("%d: working endpoint %d: %ld\n",
                               1428                 :                 t->id, k, LOFF(endpts[k])));
 4062 tgl                      1429 ECB             : 
                               1430                 :         /* k'th sub-match can no longer be considered verified */
 4062 tgl                      1431 GIC           6 :         if (nverified >= k)
 4062 tgl                      1432 UBC           0 :             nverified = k - 1;
 4062 tgl                      1433 EUB             : 
 4062 tgl                      1434 GIC           6 :         if (endpts[k] != end)
                               1435                 :         {
                               1436                 :             /* haven't reached end yet, try another iteration if allowed */
 4062 tgl                      1437 UIC           0 :             if (k >= max_matches)
                               1438                 :             {
 4062 tgl                      1439 ECB             :                 /* must try to lengthen some previous match */
 4062 tgl                      1440 UBC           0 :                 k--;
 4062 tgl                      1441 UIC           0 :                 goto backtrack;
 4062 tgl                      1442 ECB             :             }
                               1443                 : 
 4062 tgl                      1444 UIC           0 :             k++;
 4062 tgl                      1445 UBC           0 :             limit = endpts[k - 1];
 4062 tgl                      1446 UIC           0 :             continue;
                               1447                 :         }
 4062 tgl                      1448 EUB             : 
                               1449                 :         /*
                               1450                 :          * We've identified a way to divide the string into k sub-matches that
                               1451                 :          * works so far as the child DFA can tell.  If k is an allowed number
 3955 bruce                    1452                 :          * of matches, start the slow part: recurse to verify each sub-match.
                               1453                 :          * We always have k <= max_matches, needn't check that.
 4062 tgl                      1454                 :          */
 4062 tgl                      1455 GIC           6 :         if (k < min_matches)
 4062 tgl                      1456 UIC           0 :             goto backtrack;
                               1457                 : 
                               1458                 :         MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
                               1459                 : 
 4062 tgl                      1460 GIC          10 :         for (i = nverified + 1; i <= k; i++)
                               1461                 :         {
                               1462                 :             /* zap any match data from a non-last iteration */
  778 tgl                      1463 CBC           6 :             zaptreesubs(v, t->child);
  778 tgl                      1464 GBC           6 :             er = cdissect(v, t->child, endpts[i - 1], endpts[i]);
 4062 tgl                      1465 GIC           6 :             if (er == REG_OKAY)
                               1466                 :             {
                               1467               4 :                 nverified = i;
 4062 tgl                      1468 CBC           4 :                 continue;
                               1469                 :             }
 4062 tgl                      1470 GIC           2 :             if (er == REG_NOMATCH)
 4062 tgl                      1471 CBC           2 :                 break;
 4062 tgl                      1472 ECB             :             /* oops, something failed */
 4062 tgl                      1473 LBC           0 :             FREE(endpts);
 4062 tgl                      1474 UIC           0 :             return er;
 4062 tgl                      1475 ECB             :         }
                               1476                 : 
 4062 tgl                      1477 GIC           6 :         if (i > k)
 4062 tgl                      1478 ECB             :         {
                               1479                 :             /* satisfaction */
                               1480                 :             MDEBUG(("%d: successful\n", t->id));
 4062 tgl                      1481 GBC           4 :             FREE(endpts);
                               1482               4 :             return REG_OKAY;
                               1483                 :         }
                               1484                 : 
  597 tgl                      1485 ECB             :         /* i'th match failed to verify, so backtrack it */
  597 tgl                      1486 GIC           2 :         k = i;
                               1487                 : 
 4062                          1488               2 : backtrack:
 3955 bruce                    1489 ECB             : 
 4062 tgl                      1490                 :         /*
                               1491                 :          * Must consider longer versions of the k'th sub-match.
                               1492                 :          */
 4062 tgl                      1493 GIC           4 :         while (k > 0)
 4062 tgl                      1494 ECB             :         {
 4062 tgl                      1495 GIC           2 :             if (endpts[k] < end)
 4062 tgl                      1496 ECB             :             {
 4062 tgl                      1497 UIC           0 :                 limit = endpts[k] + 1;
                               1498                 :                 /* break out of backtrack loop, continue the outer one */
                               1499               0 :                 break;
                               1500                 :             }
 4062 tgl                      1501 ECB             :             /* can't lengthen k'th sub-match any more, consider previous one */
 4062 tgl                      1502 GIC           2 :             k--;
 4062 tgl                      1503 ECB             :         }
                               1504                 :     }
 4062 tgl                      1505 EUB             : 
                               1506                 :     /* all possibilities exhausted */
  781                          1507                 :     MDEBUG(("%d: failed\n", t->id));
 4062 tgl                      1508 GIC           2 :     FREE(endpts);
                               1509               2 :     return REG_NOMATCH;
 4062 tgl                      1510 ECB             : }
                               1511                 : 
                               1512                 : 
                               1513                 : 
                               1514                 : #include "rege_dfa.c"
        

Generated by: LCOV version v1.16-55-g56c0a2a