LCOV - differential code coverage report
Current view: top level - src/test/modules/test_regex - test_regex.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 86.5 % 311 269 25 15 2 24 98 147 16 94 3
Current Date: 2023-04-08 15:15:32 Functions: 100.0 % 9 9 5 1 3 5
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*--------------------------------------------------------------------------
       2                 :  *
       3                 :  * test_regex.c
       4                 :  *      Test harness for the regular expression package.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *      src/test/modules/test_regex/test_regex.c
      11                 :  *
      12                 :  * -------------------------------------------------------------------------
      13                 :  */
      14                 : 
      15                 : #include "postgres.h"
      16                 : 
      17                 : #include "funcapi.h"
      18                 : #include "miscadmin.h"
      19                 : #include "regex/regex.h"
      20                 : #include "utils/array.h"
      21                 : #include "utils/builtins.h"
      22                 : 
      23 CBC           2 : PG_MODULE_MAGIC;
      24                 : 
      25                 : 
      26                 : /* all the options of interest for regex functions */
      27                 : typedef struct test_re_flags
      28                 : {
      29                 :     int         cflags;         /* compile flags for Spencer's regex code */
      30                 :     int         eflags;         /* execute flags for Spencer's regex code */
      31                 :     long        info;           /* expected re_info bits */
      32                 :     bool        glob;           /* do it globally (for each occurrence) */
      33                 :     bool        indices;        /* report indices not actual strings */
      34                 :     bool        partial;        /* expect partial match */
      35                 : } test_re_flags;
      36                 : 
      37                 : /* cross-call state for test_regex() */
      38                 : typedef struct test_regex_ctx
      39                 : {
      40                 :     test_re_flags re_flags;     /* flags */
      41                 :     rm_detail_t details;        /* "details" from execution */
      42                 :     text       *orig_str;       /* data string in original TEXT form */
      43                 :     int         nmatches;       /* number of places where pattern matched */
      44                 :     int         npatterns;      /* number of capturing subpatterns */
      45                 :     /* We store start char index and end+1 char index for each match */
      46                 :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      47                 :     int        *match_locs;     /* 0-based character indexes */
      48                 :     int         next_match;     /* 0-based index of next match to process */
      49                 :     /* workspace for build_test_match_result() */
      50                 :     Datum      *elems;          /* has npatterns+1 elements */
      51                 :     bool       *nulls;          /* has npatterns+1 elements */
      52                 :     pg_wchar   *wide_str;       /* wide-char version of original string */
      53                 :     char       *conv_buf;       /* conversion buffer, if needed */
      54                 :     int         conv_bufsiz;    /* size thereof */
      55                 : } test_regex_ctx;
      56                 : 
      57                 : /* Local functions */
      58                 : static void test_re_compile(text *text_re, int cflags, Oid collation,
      59                 :                             regex_t *result_re);
      60                 : static void parse_test_flags(test_re_flags *flags, text *opts);
      61                 : static test_regex_ctx *setup_test_matches(text *orig_str,
      62                 :                                           regex_t *cpattern,
      63                 :                                           test_re_flags *re_flags,
      64                 :                                           Oid collation,
      65                 :                                           bool use_subpatterns);
      66                 : static ArrayType *build_test_info_result(regex_t *cpattern,
      67                 :                                          test_re_flags *flags);
      68                 : static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
      69                 : 
      70                 : 
      71                 : /*
      72                 :  * test_regex(pattern text, string text, flags text) returns setof text[]
      73                 :  *
      74                 :  * This is largely based on regexp.c's regexp_matches, with additions
      75                 :  * for debugging purposes.
      76                 :  */
      77               3 : PG_FUNCTION_INFO_V1(test_regex);
      78                 : 
      79                 : Datum
      80            1762 : test_regex(PG_FUNCTION_ARGS)
      81                 : {
      82                 :     FuncCallContext *funcctx;
      83                 :     test_regex_ctx *matchctx;
      84                 :     ArrayType  *result_ary;
      85                 : 
      86            1762 :     if (SRF_IS_FIRSTCALL())
      87                 :     {
      88             694 :         text       *pattern = PG_GETARG_TEXT_PP(0);
      89             694 :         text       *flags = PG_GETARG_TEXT_PP(2);
      90             694 :         Oid         collation = PG_GET_COLLATION();
      91                 :         test_re_flags re_flags;
      92                 :         regex_t     cpattern;
      93                 :         MemoryContext oldcontext;
      94                 : 
      95             694 :         funcctx = SRF_FIRSTCALL_INIT();
      96             694 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
      97                 : 
      98                 :         /* Determine options */
      99             694 :         parse_test_flags(&re_flags, flags);
     100                 : 
     101                 :         /* set up the compiled pattern */
     102             694 :         test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
     103                 : 
     104                 :         /* be sure to copy the input string into the multi-call ctx */
     105             588 :         matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
     106                 :                                       &re_flags,
     107                 :                                       collation,
     108                 :                                       true);
     109                 : 
     110                 :         /* Pre-create workspace that build_test_match_result needs */
     111            1176 :         matchctx->elems = (Datum *) palloc(sizeof(Datum) *
     112             588 :                                            (matchctx->npatterns + 1));
     113            1176 :         matchctx->nulls = (bool *) palloc(sizeof(bool) *
     114             588 :                                           (matchctx->npatterns + 1));
     115                 : 
     116             588 :         MemoryContextSwitchTo(oldcontext);
     117             588 :         funcctx->user_fctx = (void *) matchctx;
     118                 : 
     119                 :         /*
     120                 :          * Return the first result row, which is info equivalent to Tcl's
     121                 :          * "regexp -about" output
     122                 :          */
     123             588 :         result_ary = build_test_info_result(&cpattern, &re_flags);
     124                 : 
     125             588 :         pg_regfree(&cpattern);
     126                 : 
     127             588 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     128                 :     }
     129                 :     else
     130                 :     {
     131                 :         /* Each subsequent row describes one match */
     132            1068 :         funcctx = SRF_PERCALL_SETUP();
     133            1068 :         matchctx = (test_regex_ctx *) funcctx->user_fctx;
     134                 : 
     135            1068 :         if (matchctx->next_match < matchctx->nmatches)
     136                 :         {
     137             480 :             result_ary = build_test_match_result(matchctx);
     138             480 :             matchctx->next_match++;
     139             480 :             SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
     140                 :         }
     141                 :     }
     142                 : 
     143             588 :     SRF_RETURN_DONE(funcctx);
     144                 : }
     145                 : 
     146                 : 
     147                 : /*
     148                 :  * test_re_compile - compile a RE
     149                 :  *
     150                 :  *  text_re --- the pattern, expressed as a TEXT object
     151                 :  *  cflags --- compile options for the pattern
     152                 :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     153                 :  *  result_re --- output, compiled RE is stored here
     154                 :  *
     155                 :  * Pattern is given in the database encoding.  We internally convert to
     156                 :  * an array of pg_wchar, which is what Spencer's regex package wants.
     157                 :  *
     158                 :  * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
     159                 :  */
     160                 : static void
     161             694 : test_re_compile(text *text_re, int cflags, Oid collation,
     162                 :                 regex_t *result_re)
     163                 : {
     164             694 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
     165             694 :     char       *text_re_val = VARDATA_ANY(text_re);
     166                 :     pg_wchar   *pattern;
     167                 :     int         pattern_len;
     168                 :     int         regcomp_result;
     169                 :     char        errMsg[100];
     170                 : 
     171                 :     /* Convert pattern string to wide characters */
     172             694 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     173             694 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
     174                 :                                        pattern,
     175                 :                                        text_re_len);
     176                 : 
     177             694 :     regcomp_result = pg_regcomp(result_re,
     178                 :                                 pattern,
     179                 :                                 pattern_len,
     180                 :                                 cflags,
     181                 :                                 collation);
     182                 : 
     183             694 :     pfree(pattern);
     184                 : 
     185             694 :     if (regcomp_result != REG_OKAY)
     186                 :     {
     187                 :         /* re didn't compile (no need for pg_regfree, if so) */
     188 GIC         106 :         pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
     189             106 :         ereport(ERROR,
     190                 :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     191                 :                  errmsg("invalid regular expression: %s", errMsg)));
     192                 :     }
     193 CBC         588 : }
     194                 : 
     195                 : /*
     196                 :  * test_re_execute - execute a RE on pg_wchar data
     197                 :  *
     198                 :  * Returns true on match, false on no match
     199                 :  * Arguments are as for pg_regexec
     200                 :  */
     201                 : static bool
     202 GIC         588 : test_re_execute(regex_t *re, pg_wchar *data, int data_len,
     203 ECB             :                 int start_search,
     204                 :                 rm_detail_t *details,
     205                 :                 int nmatch, regmatch_t *pmatch,
     206                 :                 int eflags)
     207                 : {
     208                 :     int         regexec_result;
     209                 :     char        errMsg[100];
     210                 : 
     211                 :     /* Initialize match locations in case engine doesn't */
     212 CBC         588 :     details->rm_extend.rm_so = -1;
     213 GIC         588 :     details->rm_extend.rm_eo = -1;
     214            1462 :     for (int i = 0; i < nmatch; i++)
     215                 :     {
     216             874 :         pmatch[i].rm_so = -1;
     217             874 :         pmatch[i].rm_eo = -1;
     218                 :     }
     219                 : 
     220                 :     /* Perform RE match and return result */
     221 CBC         588 :     regexec_result = pg_regexec(re,
     222                 :                                 data,
     223                 :                                 data_len,
     224 EUB             :                                 start_search,
     225                 :                                 details,
     226                 :                                 nmatch,
     227                 :                                 pmatch,
     228                 :                                 eflags);
     229                 : 
     230 CBC         588 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     231                 :     {
     232                 :         /* re failed??? */
     233 UIC           0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     234               0 :         ereport(ERROR,
     235                 :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     236                 :                  errmsg("regular expression failed: %s", errMsg)));
     237                 :     }
     238                 : 
     239 GIC         588 :     return (regexec_result == REG_OKAY);
     240 ECB             : }
     241                 : 
     242                 : 
     243                 : /*
     244                 :  * parse_test_flags - parse the flags argument
     245                 :  *
     246                 :  *  flags --- output argument, filled with desired options
     247                 :  *  opts --- TEXT object, or NULL for defaults
     248                 :  */
     249                 : static void
     250 GIC         694 : parse_test_flags(test_re_flags *flags, text *opts)
     251 ECB             : {
     252                 :     /* these defaults must match Tcl's */
     253 CBC         694 :     int         cflags = REG_ADVANCED;
     254             694 :     int         eflags = 0;
     255 GIC         694 :     long        info = 0;
     256                 : 
     257 CBC         694 :     flags->glob = false;
     258 GIC         694 :     flags->indices = false;
     259 CBC         694 :     flags->partial = false;
     260                 : 
     261             694 :     if (opts)
     262                 :     {
     263             694 :         char       *opt_p = VARDATA_ANY(opts);
     264             694 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
     265 ECB             :         int         i;
     266                 : 
     267 CBC        1880 :         for (i = 0; i < opt_len; i++)
     268                 :         {
     269            1186 :             switch (opt_p[i])
     270 ECB             :             {
     271 CBC          78 :                 case '-':
     272 ECB             :                     /* allowed, no-op */
     273 GIC          78 :                     break;
     274               7 :                 case '!':
     275 GBC           7 :                     flags->partial = true;
     276               7 :                     break;
     277               1 :                 case '*':
     278 ECB             :                     /* test requires Unicode --- ignored here */
     279 CBC           1 :                     break;
     280              53 :                 case '0':
     281              53 :                     flags->indices = true;
     282              53 :                     break;
     283 ECB             : 
     284                 :                     /* These flags correspond to user-exposed RE options: */
     285 LBC           0 :                 case 'g':       /* global match */
     286               0 :                     flags->glob = true;
     287               0 :                     break;
     288 CBC          20 :                 case 'i':       /* case insensitive */
     289              20 :                     cflags |= REG_ICASE;
     290              20 :                     break;
     291              35 :                 case 'n':       /* \n affects ^ $ . [^ */
     292              35 :                     cflags |= REG_NEWLINE;
     293              35 :                     break;
     294               2 :                 case 'p':       /* ~Perl, \n affects . [^ */
     295 GIC           2 :                     cflags |= REG_NLSTOP;
     296               2 :                     cflags &= ~REG_NLANCH;
     297 CBC           2 :                     break;
     298               2 :                 case 'w':       /* weird, \n affects ^ $ only */
     299               2 :                     cflags &= ~REG_NLSTOP;
     300               2 :                     cflags |= REG_NLANCH;
     301               2 :                     break;
     302              14 :                 case 'x':       /* expanded syntax */
     303              14 :                     cflags |= REG_EXPANDED;
     304 GIC          14 :                     break;
     305                 : 
     306                 :                     /* These flags correspond to Tcl's -xflags options: */
     307               2 :                 case 'a':
     308               2 :                     cflags |= REG_ADVF;
     309               2 :                     break;
     310             131 :                 case 'b':
     311             131 :                     cflags &= ~REG_ADVANCED;
     312             131 :                     break;
     313 CBC          11 :                 case 'c':
     314 ECB             : 
     315                 :                     /*
     316                 :                      * Tcl calls this TCL_REG_CANMATCH, but it's really
     317                 :                      * REG_EXPECT.  In this implementation we must also set
     318                 :                      * the partial and indices flags, so that
     319                 :                      * setup_test_matches and build_test_match_result will
     320                 :                      * emit the desired data.  (They'll emit more fields than
     321                 :                      * Tcl would, but that's fine.)
     322                 :                      */
     323 CBC          11 :                     cflags |= REG_EXPECT;
     324              11 :                     flags->partial = true;
     325              11 :                     flags->indices = true;
     326              11 :                     break;
     327              10 :                 case 'e':
     328              10 :                     cflags &= ~REG_ADVANCED;
     329              10 :                     cflags |= REG_EXTENDED;
     330              10 :                     break;
     331               6 :                 case 'q':
     332               6 :                     cflags &= ~REG_ADVANCED;
     333               6 :                     cflags |= REG_QUOTE;
     334 GBC           6 :                     break;
     335               2 :                 case 'o':       /* o for opaque */
     336               2 :                     cflags |= REG_NOSUB;
     337               2 :                     break;
     338               2 :                 case 's':       /* s for start */
     339               2 :                     cflags |= REG_BOSONLY;
     340               2 :                     break;
     341               6 :                 case '+':
     342               6 :                     cflags |= REG_FAKE;
     343               6 :                     break;
     344 UBC           0 :                 case ',':
     345               0 :                     cflags |= REG_PROGRESS;
     346 LBC           0 :                     break;
     347               0 :                 case '.':
     348               0 :                     cflags |= REG_DUMP;
     349               0 :                     break;
     350               0 :                 case ':':
     351               0 :                     eflags |= REG_MTRACE;
     352               0 :                     break;
     353               0 :                 case ';':
     354               0 :                     eflags |= REG_FTRACE;
     355               0 :                     break;
     356 CBC           6 :                 case '^':
     357               6 :                     eflags |= REG_NOTBOL;
     358 GIC           6 :                     break;
     359               4 :                 case '$':
     360 CBC           4 :                     eflags |= REG_NOTEOL;
     361               4 :                     break;
     362              17 :                 case 't':
     363              17 :                     cflags |= REG_EXPECT;
     364              17 :                     break;
     365               5 :                 case '%':
     366               5 :                     eflags |= REG_SMALL;
     367               5 :                     break;
     368 ECB             : 
     369                 :                     /* These flags define expected info bits: */
     370 CBC           5 :                 case 'A':
     371               5 :                     info |= REG_UBSALNUM;
     372               5 :                     break;
     373               4 :                 case 'B':
     374               4 :                     info |= REG_UBRACES;
     375               4 :                     break;
     376              40 :                 case 'E':
     377              40 :                     info |= REG_UBBS;
     378              40 :                     break;
     379              34 :                 case 'H':
     380              34 :                     info |= REG_ULOOKAROUND;
     381              34 :                     break;
     382              10 :                 case 'I':
     383              10 :                     info |= REG_UIMPOSSIBLE;
     384              10 :                     break;
     385             162 :                 case 'L':
     386             162 :                     info |= REG_ULOCALE;
     387             162 :                     break;
     388              43 :                 case 'M':
     389              43 :                     info |= REG_UUNPORT;
     390              43 :                     break;
     391              46 :                 case 'N':
     392              46 :                     info |= REG_UEMPTYMATCH;
     393              46 :                     break;
     394             305 :                 case 'P':
     395             305 :                     info |= REG_UNONPOSIX;
     396             305 :                     break;
     397              35 :                 case 'Q':
     398              35 :                     info |= REG_UBOUNDS;
     399              35 :                     break;
     400              42 :                 case 'R':
     401              42 :                     info |= REG_UBACKREF;
     402 GIC          42 :                     break;
     403 GBC          25 :                 case 'S':
     404              25 :                     info |= REG_UUNSPEC;
     405 GIC          25 :                     break;
     406              20 :                 case 'T':
     407              20 :                     info |= REG_USHORTEST;
     408              20 :                     break;
     409               1 :                 case 'U':
     410               1 :                     info |= REG_UPBOTCH;
     411               1 :                     break;
     412 ECB             : 
     413 LBC           0 :                 default:
     414               0 :                     ereport(ERROR,
     415 ECB             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     416                 :                              errmsg("invalid regular expression test option: \"%.*s\"",
     417                 :                                     pg_mblen(opt_p + i), opt_p + i)));
     418                 :                     break;
     419                 :             }
     420                 :         }
     421                 :     }
     422 GIC         694 :     flags->cflags = cflags;
     423             694 :     flags->eflags = eflags;
     424             694 :     flags->info = info;
     425 CBC         694 : }
     426                 : 
     427                 : /*
     428                 :  * setup_test_matches --- do the initial matching
     429                 :  *
     430 ECB             :  * To simplify memory management, we do all the matching in one swoop.
     431                 :  * The returned test_regex_ctx contains the locations of all the substrings
     432                 :  * matching the pattern.
     433                 :  */
     434                 : static test_regex_ctx *
     435 GIC         588 : setup_test_matches(text *orig_str,
     436                 :                    regex_t *cpattern, test_re_flags *re_flags,
     437                 :                    Oid collation,
     438                 :                    bool use_subpatterns)
     439                 : {
     440             588 :     test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx));
     441 CBC         588 :     int         eml = pg_database_encoding_max_length();
     442                 :     int         orig_len;
     443                 :     pg_wchar   *wide_str;
     444 ECB             :     int         wide_len;
     445                 :     regmatch_t *pmatch;
     446                 :     int         pmatch_len;
     447                 :     int         array_len;
     448                 :     int         array_idx;
     449                 :     int         prev_match_end;
     450                 :     int         start_search;
     451 CBC         588 :     int         maxlen = 0;     /* largest fetch length in characters */
     452 ECB             : 
     453                 :     /* save flags */
     454 GIC         588 :     matchctx->re_flags = *re_flags;
     455 ECB             : 
     456                 :     /* save original string --- we'll extract result substrings from it */
     457 CBC         588 :     matchctx->orig_str = orig_str;
     458 ECB             : 
     459                 :     /* convert string to pg_wchar form for matching */
     460 GIC         588 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
     461             588 :     wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
     462 CBC         588 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
     463 ECB             : 
     464                 :     /* do we want to remember subpatterns? */
     465 GIC         588 :     if (use_subpatterns && cpattern->re_nsub > 0)
     466                 :     {
     467             127 :         matchctx->npatterns = cpattern->re_nsub + 1;
     468 CBC         127 :         pmatch_len = cpattern->re_nsub + 1;
     469                 :     }
     470                 :     else
     471                 :     {
     472 GIC         461 :         use_subpatterns = false;
     473             461 :         matchctx->npatterns = 1;
     474             461 :         pmatch_len = 1;
     475                 :     }
     476 ECB             : 
     477                 :     /* temporary output space for RE package */
     478 CBC         588 :     pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
     479                 : 
     480                 :     /*
     481 ECB             :      * the real output space (grown dynamically if needed)
     482                 :      *
     483                 :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
     484                 :      * than at 2^27
     485                 :      */
     486 GIC         588 :     array_len = re_flags->glob ? 255 : 31;
     487             588 :     matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
     488             588 :     array_idx = 0;
     489                 : 
     490 ECB             :     /* search for the pattern, perhaps repeatedly */
     491 GIC         588 :     prev_match_end = 0;
     492 GBC         588 :     start_search = 0;
     493             588 :     while (test_re_execute(cpattern, wide_str, wide_len,
     494 EUB             :                            start_search,
     495                 :                            &matchctx->details,
     496                 :                            pmatch_len, pmatch,
     497                 :                            re_flags->eflags))
     498                 :     {
     499                 :         /* enlarge output space if needed */
     500 GIC         462 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     501                 :         {
     502 LBC           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     503 UIC           0 :             if (array_len > MaxAllocSize / sizeof(int))
     504 LBC           0 :                 ereport(ERROR,
     505 ECB             :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     506                 :                          errmsg("too many regular expression matches")));
     507 LBC           0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     508 ECB             :                                                     sizeof(int) * array_len);
     509                 :         }
     510                 : 
     511                 :         /* save this match's locations */
     512 CBC        1092 :         for (int i = 0; i < matchctx->npatterns; i++)
     513 ECB             :         {
     514 GIC         630 :             int         so = pmatch[i].rm_so;
     515             630 :             int         eo = pmatch[i].rm_eo;
     516 ECB             : 
     517 CBC         630 :             matchctx->match_locs[array_idx++] = so;
     518 GIC         630 :             matchctx->match_locs[array_idx++] = eo;
     519             630 :             if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
     520             438 :                 maxlen = (eo - so);
     521                 :         }
     522             462 :         matchctx->nmatches++;
     523             462 :         prev_match_end = pmatch[0].rm_eo;
     524                 : 
     525 EUB             :         /* if not glob, stop after one match */
     526 GBC         462 :         if (!re_flags->glob)
     527             462 :             break;
     528 EUB             : 
     529                 :         /*
     530                 :          * Advance search position.  Normally we start the next search at the
     531                 :          * end of the previous match; but if the match was of zero length, we
     532                 :          * have to advance by one character, or we'd just find the same match
     533                 :          * again.
     534                 :          */
     535 UIC           0 :         start_search = prev_match_end;
     536 LBC           0 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
     537 UIC           0 :             start_search++;
     538               0 :         if (start_search > wide_len)
     539 LBC           0 :             break;
     540                 :     }
     541 EUB             : 
     542                 :     /*
     543                 :      * If we had no match, but "partial" and "indices" are set, emit the
     544                 :      * details.
     545                 :      */
     546 GBC         588 :     if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
     547                 :     {
     548                 :         /* enlarge output space if needed */
     549 GIC          18 :         while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
     550 ECB             :         {
     551 LBC           0 :             array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
     552 UIC           0 :             if (array_len > MaxAllocSize / sizeof(int))
     553 LBC           0 :                 ereport(ERROR,
     554                 :                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
     555 ECB             :                          errmsg("too many regular expression matches")));
     556 LBC           0 :             matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
     557                 :                                                     sizeof(int) * array_len);
     558 ECB             :         }
     559                 : 
     560 GIC          18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
     561 CBC          18 :         matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
     562                 :         /* we don't have pmatch data, so emit -1 */
     563              20 :         for (int i = 1; i < matchctx->npatterns; i++)
     564                 :         {
     565               2 :             matchctx->match_locs[array_idx++] = -1;
     566 GIC           2 :             matchctx->match_locs[array_idx++] = -1;
     567                 :         }
     568              18 :         matchctx->nmatches++;
     569                 :     }
     570                 : 
     571             588 :     Assert(array_idx <= array_len);
     572                 : 
     573             588 :     if (eml > 1)
     574                 :     {
     575             588 :         int64       maxsiz = eml * (int64) maxlen;
     576                 :         int         conv_bufsiz;
     577                 : 
     578 ECB             :         /*
     579                 :          * Make the conversion buffer large enough for any substring of
     580                 :          * interest.
     581                 :          *
     582                 :          * Worst case: assume we need the maximum size (maxlen*eml), but take
     583                 :          * advantage of the fact that the original string length in bytes is
     584                 :          * an upper bound on the byte length of any fetched substring (and we
     585                 :          * know that len+1 is safe to allocate because the varlena header is
     586                 :          * longer than 1 byte).
     587                 :          */
     588 GIC         588 :         if (maxsiz > orig_len)
     589             415 :             conv_bufsiz = orig_len + 1;
     590 EUB             :         else
     591 GBC         173 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
     592 EUB             : 
     593 GBC         588 :         matchctx->conv_buf = palloc(conv_bufsiz);
     594 GIC         588 :         matchctx->conv_bufsiz = conv_bufsiz;
     595             588 :         matchctx->wide_str = wide_str;
     596                 :     }
     597 ECB             :     else
     598                 :     {
     599                 :         /* No need to keep the wide string if we're in a single-byte charset. */
     600 UIC           0 :         pfree(wide_str);
     601               0 :         matchctx->wide_str = NULL;
     602               0 :         matchctx->conv_buf = NULL;
     603               0 :         matchctx->conv_bufsiz = 0;
     604                 :     }
     605                 : 
     606                 :     /* Clean up temp storage */
     607 GIC         588 :     pfree(pmatch);
     608 ECB             : 
     609 GIC         588 :     return matchctx;
     610                 : }
     611                 : 
     612                 : /*
     613                 :  * build_test_info_result - build output array describing compiled regexp
     614                 :  *
     615                 :  * This borrows some code from Tcl's TclRegAbout().
     616                 :  */
     617                 : static ArrayType *
     618             588 : build_test_info_result(regex_t *cpattern, test_re_flags *flags)
     619                 : {
     620                 :     /* Translation data for flag bits in regex_t.re_info */
     621                 :     struct infoname
     622                 :     {
     623                 :         int         bit;
     624                 :         const char *text;
     625                 :     };
     626                 :     static const struct infoname infonames[] = {
     627                 :         {REG_UBACKREF, "REG_UBACKREF"},
     628                 :         {REG_ULOOKAROUND, "REG_ULOOKAROUND"},
     629                 :         {REG_UBOUNDS, "REG_UBOUNDS"},
     630                 :         {REG_UBRACES, "REG_UBRACES"},
     631                 :         {REG_UBSALNUM, "REG_UBSALNUM"},
     632                 :         {REG_UPBOTCH, "REG_UPBOTCH"},
     633                 :         {REG_UBBS, "REG_UBBS"},
     634                 :         {REG_UNONPOSIX, "REG_UNONPOSIX"},
     635 ECB             :         {REG_UUNSPEC, "REG_UUNSPEC"},
     636                 :         {REG_UUNPORT, "REG_UUNPORT"},
     637                 :         {REG_ULOCALE, "REG_ULOCALE"},
     638                 :         {REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
     639                 :         {REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
     640                 :         {REG_USHORTEST, "REG_USHORTEST"},
     641                 :         {0, NULL}
     642                 :     };
     643                 :     const struct infoname *inf;
     644                 :     Datum       elems[lengthof(infonames) + 1];
     645 CBC         588 :     int         nresults = 0;
     646                 :     char        buf[80];
     647 ECB             :     int         dims[1];
     648                 :     int         lbs[1];
     649                 : 
     650                 :     /* Set up results: first, the number of subexpressions */
     651 GIC         588 :     snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
     652             588 :     elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     653 EUB             : 
     654                 :     /* Report individual info bit states */
     655 GIC        8820 :     for (inf = infonames; inf->bit != 0; inf++)
     656                 :     {
     657            8232 :         if (cpattern->re_info & inf->bit)
     658                 :         {
     659 CBC         749 :             if (flags->info & inf->bit)
     660 GIC         749 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
     661 EUB             :             else
     662                 :             {
     663 UIC           0 :                 snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
     664               0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     665                 :             }
     666                 :         }
     667                 :         else
     668 ECB             :         {
     669 CBC        7483 :             if (flags->info & inf->bit)
     670                 :             {
     671 LBC           0 :                 snprintf(buf, sizeof(buf), "missing %s!", inf->text);
     672 UIC           0 :                 elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
     673                 :             }
     674                 :         }
     675                 :     }
     676                 : 
     677                 :     /* And form an array */
     678 GIC         588 :     dims[0] = nresults;
     679             588 :     lbs[0] = 1;
     680                 :     /* XXX: this hardcodes assumptions about the text type */
     681             588 :     return construct_md_array(elems, NULL, 1, dims, lbs,
     682 ECB             :                               TEXTOID, -1, false, TYPALIGN_INT);
     683                 : }
     684                 : 
     685                 : /*
     686                 :  * build_test_match_result - build output array for current match
     687                 :  *
     688                 :  * Note that if the indices flag is set, we don't need any strings,
     689                 :  * just the location data.
     690                 :  */
     691                 : static ArrayType *
     692 GIC         480 : build_test_match_result(test_regex_ctx *matchctx)
     693                 : {
     694             480 :     char       *buf = matchctx->conv_buf;
     695 CBC         480 :     Datum      *elems = matchctx->elems;
     696             480 :     bool       *nulls = matchctx->nulls;
     697 GIC         480 :     bool        indices = matchctx->re_flags.indices;
     698 ECB             :     char        bufstr[80];
     699                 :     int         dims[1];
     700                 :     int         lbs[1];
     701                 :     int         loc;
     702                 :     int         i;
     703                 : 
     704                 :     /* Extract matching substrings from the original string */
     705 GIC         480 :     loc = matchctx->next_match * matchctx->npatterns * 2;
     706 CBC        1130 :     for (i = 0; i < matchctx->npatterns; i++)
     707 ECB             :     {
     708 GIC         650 :         int         so = matchctx->match_locs[loc++];
     709 CBC         650 :         int         eo = matchctx->match_locs[loc++];
     710                 : 
     711             650 :         if (indices)
     712 ECB             :         {
     713                 :             /* Report eo this way for consistency with Tcl */
     714 CBC          84 :             snprintf(bufstr, sizeof(bufstr), "%d %d",
     715                 :                      so, so < 0 ? eo : eo - 1);
     716              84 :             elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     717 GIC          84 :             nulls[i] = false;
     718                 :         }
     719             566 :         else if (so < 0 || eo < 0)
     720 ECB             :         {
     721 CBC          12 :             elems[i] = (Datum) 0;
     722              12 :             nulls[i] = true;
     723                 :         }
     724 GIC         554 :         else if (buf)
     725                 :         {
     726 GBC         554 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
     727                 :                                                    buf,
     728                 :                                                    eo - so);
     729                 : 
     730             554 :             Assert(len < matchctx->conv_bufsiz);
     731 GIC         554 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
     732             554 :             nulls[i] = false;
     733                 :         }
     734                 :         else
     735 ECB             :         {
     736 UIC           0 :             elems[i] = DirectFunctionCall3(text_substr,
     737 ECB             :                                            PointerGetDatum(matchctx->orig_str),
     738                 :                                            Int32GetDatum(so + 1),
     739                 :                                            Int32GetDatum(eo - so));
     740 LBC           0 :             nulls[i] = false;
     741                 :         }
     742 ECB             :     }
     743                 : 
     744                 :     /* In EXPECT indices mode, also report the "details" */
     745 GIC         480 :     if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
     746                 :     {
     747              28 :         int         so = matchctx->details.rm_extend.rm_so;
     748 CBC          28 :         int         eo = matchctx->details.rm_extend.rm_eo;
     749 ECB             : 
     750 GIC          28 :         snprintf(bufstr, sizeof(bufstr), "%d %d",
     751 ECB             :                  so, so < 0 ? eo : eo - 1);
     752 GIC          28 :         elems[i] = PointerGetDatum(cstring_to_text(bufstr));
     753              28 :         nulls[i] = false;
     754              28 :         i++;
     755                 :     }
     756                 : 
     757                 :     /* And form an array */
     758             480 :     dims[0] = i;
     759             480 :     lbs[0] = 1;
     760                 :     /* XXX: this hardcodes assumptions about the text type */
     761             480 :     return construct_md_array(elems, nulls, 1, dims, lbs,
     762                 :                               TEXTOID, -1, false, TYPALIGN_INT);
     763                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a