LCOV - Differential Code Coverage HEAD vs 15 - src/backend/utils/adt/regexp.c

LCOV - differential code coverage report

Current view:	top level - src/backend/utils/adt - regexp.c (source / functions)		Coverage	Total	Hit	LBC	UIC	UBC	GBC	GIC	GNC	CBC	EUB	ECB	DCB
Current:	Differential Code Coverage HEAD vs 15	Lines:	89.5 %	649	581	19	38	11	23	371	13	174	34	380	8
Current Date:	2023-04-08 15:15:32	Functions:	98.0 %	51	50		1			50			1	50
Baseline:	15
Baseline Date:	2023-04-08 15:09:40
Legend:	Lines: hit not hit

           TLA  Line data    Source code

       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * regexp.c
       4                 :  *    Postgres' interface to the regular expression package.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  *
      10                 :  * IDENTIFICATION
      11                 :  *    src/backend/utils/adt/regexp.c
      12                 :  *
      13                 :  *      Alistair Crooks added the code for the regex caching
      14                 :  *      agc - cached the regular expressions used - there's a good chance
      15                 :  *      that we'll get a hit, so this saves a compile step for every
      16                 :  *      attempted match. I haven't actually measured the speed improvement,
      17                 :  *      but it `looks' a lot quicker visually when watching regression
      18                 :  *      test output.
      19                 :  *
      20                 :  *      agc - incorporated Keith Bostic's Berkeley regex code into
      21                 :  *      the tree for all ports. To distinguish this regex code from any that
      22                 :  *      is existent on a platform, I've prepended the string "pg_" to
      23                 :  *      the functions regcomp, regerror, regexec and regfree.
      24                 :  *      Fixed a bug that was originally a typo by me, where `i' was used
      25                 :  *      instead of `oldest' when compiling regular expressions - benign
      26                 :  *      results mostly, although occasionally it bit you...
      27                 :  *
      28                 :  *-------------------------------------------------------------------------
      29                 :  */
      30                 : #include "postgres.h"
      31                 : 
      32                 : #include "catalog/pg_type.h"
      33                 : #include "funcapi.h"
      34                 : #include "miscadmin.h"
      35                 : #include "regex/regex.h"
      36                 : #include "utils/array.h"
      37                 : #include "utils/builtins.h"
      38                 : #include "utils/memutils.h"
      39                 : #include "utils/varlena.h"
      40                 : 
      41                 : #define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
      42                 :     (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
      43                 : 
      44                 : 
      45                 : /* all the options of interest for regex functions */
      46                 : typedef struct pg_re_flags
      47                 : {
      48                 :     int         cflags;         /* compile flags for Spencer's regex code */
      49                 :     bool        glob;           /* do it globally (for each occurrence) */
      50                 : } pg_re_flags;
      51                 : 
      52                 : /* cross-call state for regexp_match and regexp_split functions */
      53                 : typedef struct regexp_matches_ctx
      54                 : {
      55                 :     text       *orig_str;       /* data string in original TEXT form */
      56                 :     int         nmatches;       /* number of places where pattern matched */
      57                 :     int         npatterns;      /* number of capturing subpatterns */
      58                 :     /* We store start char index and end+1 char index for each match */
      59                 :     /* so the number of entries in match_locs is nmatches * npatterns * 2 */
      60                 :     int        *match_locs;     /* 0-based character indexes */
      61                 :     int         next_match;     /* 0-based index of next match to process */
      62                 :     /* workspace for build_regexp_match_result() */
      63                 :     Datum      *elems;          /* has npatterns elements */
      64                 :     bool       *nulls;          /* has npatterns elements */
      65                 :     pg_wchar   *wide_str;       /* wide-char version of original string */
      66                 :     char       *conv_buf;       /* conversion buffer, if needed */
      67                 :     int         conv_bufsiz;    /* size thereof */
      68                 : } regexp_matches_ctx;
      69                 : 
      70                 : /*
      71                 :  * We cache precompiled regular expressions using a "self organizing list"
      72                 :  * structure, in which recently-used items tend to be near the front.
      73                 :  * Whenever we use an entry, it's moved up to the front of the list.
      74                 :  * Over time, an item's average position corresponds to its frequency of use.
      75                 :  *
      76                 :  * When we first create an entry, it's inserted at the front of
      77                 :  * the array, dropping the entry at the end of the array if necessary to
      78                 :  * make room.  (This might seem to be weighting the new entry too heavily,
      79                 :  * but if we insert new entries further back, we'll be unable to adjust to
      80                 :  * a sudden shift in the query mix where we are presented with MAX_CACHED_RES
      81                 :  * never-before-seen items used circularly.  We ought to be able to handle
      82                 :  * that case, so we have to insert at the front.)
      83                 :  *
      84                 :  * Knuth mentions a variant strategy in which a used item is moved up just
      85                 :  * one place in the list.  Although he says this uses fewer comparisons on
      86                 :  * average, it seems not to adapt very well to the situation where you have
      87                 :  * both some reusable patterns and a steady stream of non-reusable patterns.
      88                 :  * A reusable pattern that isn't used at least as often as non-reusable
      89                 :  * patterns are seen will "fail to keep up" and will drop off the end of the
      90                 :  * cache.  With move-to-front, a reusable pattern is guaranteed to stay in
      91                 :  * the cache as long as it's used at least once in every MAX_CACHED_RES uses.
      92                 :  */
      93                 : 
      94                 : /* this is the maximum number of cached regular expressions */
      95                 : #ifndef MAX_CACHED_RES
      96                 : #define MAX_CACHED_RES  32
      97                 : #endif
      98                 : 
      99                 : /* A parent memory context for regular expressions. */
     100                 : static MemoryContext RegexpCacheMemoryContext;
     101                 : 
     102                 : /* this structure describes one cached regular expression */
     103                 : typedef struct cached_re_str
     104                 : {
     105                 :     MemoryContext cre_context;  /* memory context for this regexp */
     106                 :     char       *cre_pat;        /* original RE (not null terminated!) */
     107                 :     int         cre_pat_len;    /* length of original RE, in bytes */
     108                 :     int         cre_flags;      /* compile flags: extended,icase etc */
     109                 :     Oid         cre_collation;  /* collation to use */
     110                 :     regex_t     cre_re;         /* the compiled regular expression */
     111                 : } cached_re_str;
     112                 : 
     113                 : static int  num_res = 0;        /* # of cached re's */
     114                 : static cached_re_str re_array[MAX_CACHED_RES];  /* cached re's */
     115                 : 
     116                 : 
     117                 : /* Local functions */
     118                 : static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
     119                 :                                                 pg_re_flags *re_flags,
     120                 :                                                 int start_search,
     121                 :                                                 Oid collation,
     122                 :                                                 bool use_subpatterns,
     123                 :                                                 bool ignore_degenerate,
     124                 :                                                 bool fetching_unmatched);
     125                 : static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
     126                 : static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
     127                 : 
     128                 : 
     129                 : /*
     130                 :  * RE_compile_and_cache - compile a RE, caching if possible
     131                 :  *
     132                 :  * Returns regex_t *
     133                 :  *
     134                 :  *  text_re --- the pattern, expressed as a TEXT object
     135                 :  *  cflags --- compile options for the pattern
     136                 :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     137                 :  *
     138                 :  * Pattern is given in the database encoding.  We internally convert to
     139                 :  * an array of pg_wchar, which is what Spencer's regex package wants.
     140                 :  */
     141                 : regex_t *
     142 GIC      459737 : RE_compile_and_cache(text *text_re, int cflags, Oid collation)
     143                 : {
     144          459737 :     int         text_re_len = VARSIZE_ANY_EXHDR(text_re);
     145          459737 :     char       *text_re_val = VARDATA_ANY(text_re);
     146 ECB             :     pg_wchar   *pattern;
     147                 :     int         pattern_len;
     148                 :     int         i;
     149                 :     int         regcomp_result;
     150                 :     cached_re_str re_temp;
     151                 :     char        errMsg[100];
     152                 :     MemoryContext oldcontext;
     153                 : 
     154                 :     /*
     155                 :      * Look for a match among previously compiled REs.  Since the data
     156                 :      * structure is self-organizing with most-used entries at the front, our
     157                 :      * search strategy can just be to scan from the front.
     158                 :      */
     159 GIC      728658 :     for (i = 0; i < num_res; i++)
     160                 :     {
     161          725642 :         if (re_array[i].cre_pat_len == text_re_len &&
     162          465084 :             re_array[i].cre_flags == cflags &&
     163          464464 :             re_array[i].cre_collation == collation &&
     164 CBC      464370 :             memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
     165                 :         {
     166 ECB             :             /*
     167                 :              * Found a match; move it to front if not there already.
     168                 :              */
     169 CBC      456721 :             if (i > 0)
     170                 :             {
     171 GIC      224146 :                 re_temp = re_array[i];
     172          224146 :                 memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str));
     173          224146 :                 re_array[0] = re_temp;
     174 ECB             :             }
     175                 : 
     176 CBC      456721 :             return &re_array[0].cre_re;
     177 ECB             :         }
     178                 :     }
     179                 : 
     180                 :     /* Set up the cache memory on first go through. */
     181 GNC        3016 :     if (unlikely(RegexpCacheMemoryContext == NULL))
     182             795 :         RegexpCacheMemoryContext =
     183             795 :             AllocSetContextCreate(TopMemoryContext,
     184                 :                                   "RegexpCacheMemoryContext",
     185                 :                                   ALLOCSET_SMALL_SIZES);
     186                 : 
     187                 :     /*
     188 ECB             :      * Couldn't find it, so try to compile the new RE.  To avoid leaking
     189                 :      * resources on failure, we build into the re_temp local.
     190                 :      */
     191                 : 
     192                 :     /* Convert pattern string to wide characters */
     193 CBC        3016 :     pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
     194            3016 :     pattern_len = pg_mb2wchar_with_len(text_re_val,
     195 ECB             :                                        pattern,
     196                 :                                        text_re_len);
     197                 : 
     198                 :     /*
     199                 :      * Make a memory context for this compiled regexp.  This is initially a
     200                 :      * child of the current memory context, so it will be cleaned up
     201                 :      * automatically if compilation is interrupted and throws an ERROR. We'll
     202                 :      * re-parent it under the longer lived cache context if we make it to the
     203                 :      * bottom of this function.
     204                 :      */
     205 GNC        3016 :     re_temp.cre_context = AllocSetContextCreate(CurrentMemoryContext,
     206                 :                                                 "RegexpMemoryContext",
     207                 :                                                 ALLOCSET_SMALL_SIZES);
     208            3016 :     oldcontext = MemoryContextSwitchTo(re_temp.cre_context);
     209                 : 
     210 GIC        3016 :     regcomp_result = pg_regcomp(&re_temp.cre_re,
     211                 :                                 pattern,
     212                 :                                 pattern_len,
     213                 :                                 cflags,
     214                 :                                 collation);
     215                 : 
     216            3004 :     pfree(pattern);
     217 ECB             : 
     218 CBC        3004 :     if (regcomp_result != REG_OKAY)
     219                 :     {
     220                 :         /* re didn't compile (no need for pg_regfree, if so) */
     221 GIC          18 :         pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
     222              18 :         ereport(ERROR,
     223 ECB             :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     224                 :                  errmsg("invalid regular expression: %s", errMsg)));
     225                 :     }
     226                 : 
     227                 :     /* Copy the pattern into the per-regexp memory context. */
     228 GNC        2986 :     re_temp.cre_pat = palloc(text_re_len + 1);
     229 GIC        2986 :     memcpy(re_temp.cre_pat, text_re_val, text_re_len);
     230                 : 
     231                 :     /*
     232                 :      * NUL-terminate it only for the benefit of the identifier used for the
     233                 :      * memory context, visible in the pg_backend_memory_contexts view.
     234                 :      */
     235 GNC        2986 :     re_temp.cre_pat[text_re_len] = 0;
     236            2986 :     MemoryContextSetIdentifier(re_temp.cre_context, re_temp.cre_pat);
     237                 : 
     238 GIC        2986 :     re_temp.cre_pat_len = text_re_len;
     239 CBC        2986 :     re_temp.cre_flags = cflags;
     240            2986 :     re_temp.cre_collation = collation;
     241                 : 
     242                 :     /*
     243                 :      * Okay, we have a valid new item in re_temp; insert it into the storage
     244                 :      * array.  Discard last entry if needed.
     245                 :      */
     246            2986 :     if (num_res >= MAX_CACHED_RES)
     247 ECB             :     {
     248 GIC         366 :         --num_res;
     249 CBC         366 :         Assert(num_res < MAX_CACHED_RES);
     250                 :         /* Delete the memory context holding the regexp and pattern. */
     251 GNC         366 :         MemoryContextDelete(re_array[num_res].cre_context);
     252                 :     }
     253                 : 
     254                 :     /* Re-parent the memory context to our long-lived cache context. */
     255            2986 :     MemoryContextSetParent(re_temp.cre_context, RegexpCacheMemoryContext);
     256                 : 
     257 GIC        2986 :     if (num_res > 0)
     258            2191 :         memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
     259                 : 
     260 CBC        2986 :     re_array[0] = re_temp;
     261 GIC        2986 :     num_res++;
     262 ECB             : 
     263 GNC        2986 :     MemoryContextSwitchTo(oldcontext);
     264                 : 
     265 CBC        2986 :     return &re_array[0].cre_re;
     266                 : }
     267 ECB             : 
     268                 : /*
     269                 :  * RE_wchar_execute - execute a RE on pg_wchar data
     270                 :  *
     271                 :  * Returns true on match, false on no match
     272                 :  *
     273                 :  *  re --- the compiled pattern as returned by RE_compile_and_cache
     274                 :  *  data --- the data to match against (need not be null-terminated)
     275                 :  *  data_len --- the length of the data string
     276                 :  *  start_search -- the offset in the data to start searching
     277                 :  *  nmatch, pmatch  --- optional return area for match details
     278                 :  *
     279                 :  * Data is given as array of pg_wchar which is what Spencer's regex package
     280                 :  * wants.
     281                 :  */
     282                 : static bool
     283 GIC      893861 : RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
     284                 :                  int start_search, int nmatch, regmatch_t *pmatch)
     285                 : {
     286                 :     int         regexec_result;
     287                 :     char        errMsg[100];
     288                 : 
     289                 :     /* Perform RE match and return result */
     290          893861 :     regexec_result = pg_regexec(re,
     291                 :                                 data,
     292                 :                                 data_len,
     293                 :                                 start_search,
     294                 :                                 NULL,   /* no details */
     295                 :                                 nmatch,
     296                 :                                 pmatch,
     297                 :                                 0);
     298                 : 
     299 CBC      893861 :     if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
     300                 :     {
     301                 :         /* re failed??? */
     302 UIC           0 :         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
     303               0 :         ereport(ERROR,
     304                 :                 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
     305 ECB             :                  errmsg("regular expression failed: %s", errMsg)));
     306                 :     }
     307                 : 
     308 GIC      893861 :     return (regexec_result == REG_OKAY);
     309                 : }
     310                 : 
     311                 : /*
     312                 :  * RE_execute - execute a RE
     313                 :  *
     314 ECB             :  * Returns true on match, false on no match
     315                 :  *
     316                 :  *  re --- the compiled pattern as returned by RE_compile_and_cache
     317 EUB             :  *  dat --- the data to match against (need not be null-terminated)
     318                 :  *  dat_len --- the length of the data string
     319                 :  *  nmatch, pmatch  --- optional return area for match details
     320                 :  *
     321                 :  * Data is given in the database encoding.  We internally
     322                 :  * convert to array of pg_wchar which is what Spencer's regex package wants.
     323 ECB             :  */
     324                 : static bool
     325 GIC      346274 : RE_execute(regex_t *re, char *dat, int dat_len,
     326                 :            int nmatch, regmatch_t *pmatch)
     327                 : {
     328                 :     pg_wchar   *data;
     329                 :     int         data_len;
     330                 :     bool        match;
     331                 : 
     332                 :     /* Convert data string to wide characters */
     333          346274 :     data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
     334          346274 :     data_len = pg_mb2wchar_with_len(dat, data, dat_len);
     335                 : 
     336                 :     /* Perform RE match and return result */
     337          346274 :     match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
     338                 : 
     339          346274 :     pfree(data);
     340 CBC      346274 :     return match;
     341                 : }
     342                 : 
     343                 : /*
     344                 :  * RE_compile_and_execute - compile and execute a RE
     345                 :  *
     346                 :  * Returns true on match, false on no match
     347                 :  *
     348 ECB             :  *  text_re --- the pattern, expressed as a TEXT object
     349                 :  *  dat --- the data to match against (need not be null-terminated)
     350                 :  *  dat_len --- the length of the data string
     351                 :  *  cflags --- compile options for the pattern
     352                 :  *  collation --- collation to use for LC_CTYPE-dependent behavior
     353                 :  *  nmatch, pmatch  --- optional return area for match details
     354                 :  *
     355                 :  * Both pattern and data are given in the database encoding.  We internally
     356                 :  * convert to array of pg_wchar which is what Spencer's regex package wants.
     357                 :  */
     358                 : bool
     359 GIC      345212 : RE_compile_and_execute(text *text_re, char *dat, int dat_len,
     360                 :                        int cflags, Oid collation,
     361                 :                        int nmatch, regmatch_t *pmatch)
     362                 : {
     363                 :     regex_t    *re;
     364                 : 
     365                 :     /* Use REG_NOSUB if caller does not want sub-match details */
     366          345212 :     if (nmatch < 2)
     367          345212 :         cflags |= REG_NOSUB;
     368                 : 
     369                 :     /* Compile RE */
     370          345212 :     re = RE_compile_and_cache(text_re, cflags, collation);
     371                 : 
     372          345200 :     return RE_execute(re, dat, dat_len, nmatch, pmatch);
     373                 : }
     374 ECB             : 
     375                 : 
     376                 : /*
     377                 :  * parse_re_flags - parse the options argument of regexp_match and friends
     378                 :  *
     379                 :  *  flags --- output argument, filled with desired options
     380                 :  *  opts --- TEXT object, or NULL for defaults
     381                 :  *
     382                 :  * This accepts all the options allowed by any of the callers; callers that
     383                 :  * don't want some have to reject them after the fact.
     384                 :  */
     385                 : static void
     386 GIC      102964 : parse_re_flags(pg_re_flags *flags, text *opts)
     387 ECB             : {
     388                 :     /* regex flavor is always folded into the compile flags */
     389 GIC      102964 :     flags->cflags = REG_ADVANCED;
     390          102964 :     flags->glob = false;
     391                 : 
     392          102964 :     if (opts)
     393                 :     {
     394            1370 :         char       *opt_p = VARDATA_ANY(opts);
     395            1370 :         int         opt_len = VARSIZE_ANY_EXHDR(opts);
     396                 :         int         i;
     397                 : 
     398            3389 :         for (i = 0; i < opt_len; i++)
     399                 :         {
     400            2031 :             switch (opt_p[i])
     401 ECB             :             {
     402 GIC        1209 :                 case 'g':
     403            1209 :                     flags->glob = true;
     404 CBC        1209 :                     break;
     405 LBC           0 :                 case 'b':       /* BREs (but why???) */
     406 UIC           0 :                     flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE);
     407 LBC           0 :                     break;
     408 GIC           5 :                 case 'c':       /* case sensitive */
     409 CBC           5 :                     flags->cflags &= ~REG_ICASE;
     410               5 :                     break;
     411 UIC           0 :                 case 'e':       /* plain EREs */
     412               0 :                     flags->cflags |= REG_EXTENDED;
     413 LBC           0 :                     flags->cflags &= ~(REG_ADVANCED | REG_QUOTE);
     414 UIC           0 :                     break;
     415 CBC         146 :                 case 'i':       /* case insensitive */
     416 GIC         146 :                     flags->cflags |= REG_ICASE;
     417 CBC         146 :                     break;
     418             650 :                 case 'm':       /* Perloid synonym for n */
     419 ECB             :                 case 'n':       /* \n affects ^ $ . [^ */
     420 GBC         650 :                     flags->cflags |= REG_NEWLINE;
     421             650 :                     break;
     422 UBC           0 :                 case 'p':       /* ~Perl, \n affects . [^ */
     423 LBC           0 :                     flags->cflags |= REG_NLSTOP;
     424               0 :                     flags->cflags &= ~REG_NLANCH;
     425               0 :                     break;
     426 UBC           0 :                 case 'q':       /* literal string */
     427               0 :                     flags->cflags |= REG_QUOTE;
     428               0 :                     flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED);
     429               0 :                     break;
     430 CBC           6 :                 case 's':       /* single line, \n ordinary */
     431               6 :                     flags->cflags &= ~REG_NEWLINE;
     432               6 :                     break;
     433 LBC           0 :                 case 't':       /* tight syntax */
     434 UIC           0 :                     flags->cflags &= ~REG_EXPANDED;
     435 LBC           0 :                     break;
     436               0 :                 case 'w':       /* weird, \n affects ^ $ only */
     437 UBC           0 :                     flags->cflags &= ~REG_NLSTOP;
     438               0 :                     flags->cflags |= REG_NLANCH;
     439               0 :                     break;
     440 GBC           3 :                 case 'x':       /* expanded syntax */
     441               3 :                     flags->cflags |= REG_EXPANDED;
     442               3 :                     break;
     443              12 :                 default:
     444              12 :                     ereport(ERROR,
     445 ECB             :                             (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     446                 :                              errmsg("invalid regular expression option: \"%.*s\"",
     447                 :                                     pg_mblen(opt_p + i), opt_p + i)));
     448 EUB             :                     break;
     449                 :             }
     450                 :         }
     451                 :     }
     452 GBC      102952 : }
     453 EUB             : 
     454                 : 
     455 ECB             : /*
     456                 :  *  interface routines called by the function manager
     457                 :  */
     458                 : 
     459                 : Datum
     460 GIC      127719 : nameregexeq(PG_FUNCTION_ARGS)
     461                 : {
     462          127719 :     Name        n = PG_GETARG_NAME(0);
     463          127719 :     text       *p = PG_GETARG_TEXT_PP(1);
     464                 : 
     465          127719 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
     466                 :                                           NameStr(*n),
     467 ECB             :                                           strlen(NameStr(*n)),
     468                 :                                           REG_ADVANCED,
     469                 :                                           PG_GET_COLLATION(),
     470                 :                                           0, NULL));
     471                 : }
     472                 : 
     473                 : Datum
     474 GIC        6782 : nameregexne(PG_FUNCTION_ARGS)
     475 ECB             : {
     476 GIC        6782 :     Name        n = PG_GETARG_NAME(0);
     477 CBC        6782 :     text       *p = PG_GETARG_TEXT_PP(1);
     478 ECB             : 
     479 GIC        6782 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
     480 ECB             :                                            NameStr(*n),
     481                 :                                            strlen(NameStr(*n)),
     482                 :                                            REG_ADVANCED,
     483                 :                                            PG_GET_COLLATION(),
     484                 :                                            0, NULL));
     485                 : }
     486                 : 
     487                 : Datum
     488 GIC      186505 : textregexeq(PG_FUNCTION_ARGS)
     489 ECB             : {
     490 GIC      186505 :     text       *s = PG_GETARG_TEXT_PP(0);
     491 CBC      186505 :     text       *p = PG_GETARG_TEXT_PP(1);
     492 ECB             : 
     493 GIC      186505 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
     494 ECB             :                                           VARDATA_ANY(s),
     495                 :                                           VARSIZE_ANY_EXHDR(s),
     496                 :                                           REG_ADVANCED,
     497                 :                                           PG_GET_COLLATION(),
     498                 :                                           0, NULL));
     499                 : }
     500                 : 
     501                 : Datum
     502 GIC       17058 : textregexne(PG_FUNCTION_ARGS)
     503 ECB             : {
     504 GIC       17058 :     text       *s = PG_GETARG_TEXT_PP(0);
     505 CBC       17058 :     text       *p = PG_GETARG_TEXT_PP(1);
     506 ECB             : 
     507 GIC       17058 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
     508 ECB             :                                            VARDATA_ANY(s),
     509                 :                                            VARSIZE_ANY_EXHDR(s),
     510                 :                                            REG_ADVANCED,
     511                 :                                            PG_GET_COLLATION(),
     512                 :                                            0, NULL));
     513                 : }
     514                 : 
     515                 : 
     516                 : /*
     517                 :  *  routines that use the regexp stuff, but ignore the case.
     518                 :  *  for this, we use the REG_ICASE flag to pg_regcomp
     519                 :  */
     520                 : 
     521                 : 
     522                 : Datum
     523 GIC        6853 : nameicregexeq(PG_FUNCTION_ARGS)
     524                 : {
     525            6853 :     Name        n = PG_GETARG_NAME(0);
     526            6853 :     text       *p = PG_GETARG_TEXT_PP(1);
     527                 : 
     528            6853 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
     529                 :                                           NameStr(*n),
     530                 :                                           strlen(NameStr(*n)),
     531                 :                                           REG_ADVANCED | REG_ICASE,
     532                 :                                           PG_GET_COLLATION(),
     533                 :                                           0, NULL));
     534                 : }
     535                 : 
     536                 : Datum
     537               3 : nameicregexne(PG_FUNCTION_ARGS)
     538 ECB             : {
     539 GIC           3 :     Name        n = PG_GETARG_NAME(0);
     540 CBC           3 :     text       *p = PG_GETARG_TEXT_PP(1);
     541 ECB             : 
     542 GIC           3 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
     543 ECB             :                                            NameStr(*n),
     544                 :                                            strlen(NameStr(*n)),
     545                 :                                            REG_ADVANCED | REG_ICASE,
     546                 :                                            PG_GET_COLLATION(),
     547                 :                                            0, NULL));
     548                 : }
     549                 : 
     550                 : Datum
     551 GIC         134 : texticregexeq(PG_FUNCTION_ARGS)
     552 ECB             : {
     553 GIC         134 :     text       *s = PG_GETARG_TEXT_PP(0);
     554 CBC         134 :     text       *p = PG_GETARG_TEXT_PP(1);
     555 ECB             : 
     556 GIC         134 :     PG_RETURN_BOOL(RE_compile_and_execute(p,
     557 ECB             :                                           VARDATA_ANY(s),
     558                 :                                           VARSIZE_ANY_EXHDR(s),
     559                 :                                           REG_ADVANCED | REG_ICASE,
     560                 :                                           PG_GET_COLLATION(),
     561                 :                                           0, NULL));
     562                 : }
     563                 : 
     564                 : Datum
     565 GIC           8 : texticregexne(PG_FUNCTION_ARGS)
     566 ECB             : {
     567 GIC           8 :     text       *s = PG_GETARG_TEXT_PP(0);
     568 CBC           8 :     text       *p = PG_GETARG_TEXT_PP(1);
     569 ECB             : 
     570 GIC           8 :     PG_RETURN_BOOL(!RE_compile_and_execute(p,
     571 ECB             :                                            VARDATA_ANY(s),
     572                 :                                            VARSIZE_ANY_EXHDR(s),
     573                 :                                            REG_ADVANCED | REG_ICASE,
     574                 :                                            PG_GET_COLLATION(),
     575                 :                                            0, NULL));
     576                 : }
     577                 : 
     578                 : 
     579                 : /*
     580                 :  * textregexsubstr()
     581                 :  *      Return a substring matched by a regular expression.
     582                 :  */
     583                 : Datum
     584 GIC        1074 : textregexsubstr(PG_FUNCTION_ARGS)
     585 ECB             : {
     586 GIC        1074 :     text       *s = PG_GETARG_TEXT_PP(0);
     587            1074 :     text       *p = PG_GETARG_TEXT_PP(1);
     588                 :     regex_t    *re;
     589                 :     regmatch_t  pmatch[2];
     590                 :     int         so,
     591                 :                 eo;
     592                 : 
     593                 :     /* Compile RE */
     594            1074 :     re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
     595                 : 
     596                 :     /*
     597                 :      * We pass two regmatch_t structs to get info about the overall match and
     598                 :      * the match for the first parenthesized subexpression (if any). If there
     599 ECB             :      * is a parenthesized subexpression, we return what it matched; else
     600                 :      * return what the whole regexp matched.
     601                 :      */
     602 CBC        2148 :     if (!RE_execute(re,
     603 GIC        2148 :                     VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
     604                 :                     2, pmatch))
     605               3 :         PG_RETURN_NULL();       /* definitely no match */
     606                 : 
     607            1071 :     if (re->re_nsub > 0)
     608                 :     {
     609 ECB             :         /* has parenthesized subexpressions, use the first one */
     610 GIC         761 :         so = pmatch[1].rm_so;
     611             761 :         eo = pmatch[1].rm_eo;
     612                 :     }
     613                 :     else
     614                 :     {
     615                 :         /* no parenthesized subexpression, use whole match */
     616             310 :         so = pmatch[0].rm_so;
     617 CBC         310 :         eo = pmatch[0].rm_eo;
     618 ECB             :     }
     619                 : 
     620                 :     /*
     621                 :      * It is possible to have a match to the whole pattern but no match for a
     622                 :      * subexpression; for example 'foo(bar)?' is considered to match 'foo' but
     623                 :      * there is no subexpression match.  So this extra test for match failure
     624                 :      * is not redundant.
     625                 :      */
     626 CBC        1071 :     if (so < 0 || eo < 0)
     627 GIC           3 :         PG_RETURN_NULL();
     628                 : 
     629            1068 :     return DirectFunctionCall3(text_substr,
     630                 :                                PointerGetDatum(s),
     631 ECB             :                                Int32GetDatum(so + 1),
     632                 :                                Int32GetDatum(eo - so));
     633                 : }
     634                 : 
     635                 : /*
     636                 :  * textregexreplace_noopt()
     637                 :  *      Return a string matched by a regular expression, with replacement.
     638                 :  *
     639                 :  * This version doesn't have an option argument: we default to case
     640                 :  * sensitive match, replace the first instance only.
     641                 :  */
     642                 : Datum
     643 GIC        4069 : textregexreplace_noopt(PG_FUNCTION_ARGS)
     644 ECB             : {
     645 GIC        4069 :     text       *s = PG_GETARG_TEXT_PP(0);
     646            4069 :     text       *p = PG_GETARG_TEXT_PP(1);
     647            4069 :     text       *r = PG_GETARG_TEXT_PP(2);
     648                 : 
     649            4069 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
     650                 :                                          REG_ADVANCED, PG_GET_COLLATION(),
     651                 :                                          0, 1));
     652                 : }
     653                 : 
     654                 : /*
     655                 :  * textregexreplace()
     656                 :  *      Return a string matched by a regular expression, with replacement.
     657                 :  */
     658 ECB             : Datum
     659 GIC        1173 : textregexreplace(PG_FUNCTION_ARGS)
     660 ECB             : {
     661 CBC        1173 :     text       *s = PG_GETARG_TEXT_PP(0);
     662            1173 :     text       *p = PG_GETARG_TEXT_PP(1);
     663 GIC        1173 :     text       *r = PG_GETARG_TEXT_PP(2);
     664 CBC        1173 :     text       *opt = PG_GETARG_TEXT_PP(3);
     665                 :     pg_re_flags flags;
     666                 : 
     667                 :     /*
     668                 :      * regexp_replace() with four arguments will be preferentially resolved as
     669                 :      * this form when the fourth argument is of type UNKNOWN.  However, the
     670                 :      * user might have intended to call textregexreplace_extended_no_n.  If we
     671                 :      * see flags that look like an integer, emit the same error that
     672                 :      * parse_re_flags would, but add a HINT about how to fix it.
     673                 :      */
     674            1173 :     if (VARSIZE_ANY_EXHDR(opt) > 0)
     675                 :     {
     676            1173 :         char       *opt_p = VARDATA_ANY(opt);
     677 ECB             : 
     678 CBC        1173 :         if (*opt_p >= '0' && *opt_p <= '9')
     679               3 :             ereport(ERROR,
     680                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     681                 :                      errmsg("invalid regular expression option: \"%.*s\"",
     682                 :                             pg_mblen(opt_p), opt_p),
     683                 :                      errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
     684                 :     }
     685                 : 
     686 GIC        1170 :     parse_re_flags(&flags, opt);
     687                 : 
     688            1167 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
     689 ECB             :                                          flags.cflags, PG_GET_COLLATION(),
     690                 :                                          0, flags.glob ? 0 : 1));
     691                 : }
     692                 : 
     693                 : /*
     694                 :  * textregexreplace_extended()
     695                 :  *      Return a string matched by a regular expression, with replacement.
     696                 :  *      Extends textregexreplace by allowing a start position and the
     697                 :  *      choice of the occurrence to replace (0 means all occurrences).
     698                 :  */
     699                 : Datum
     700 GIC          33 : textregexreplace_extended(PG_FUNCTION_ARGS)
     701 ECB             : {
     702 GIC          33 :     text       *s = PG_GETARG_TEXT_PP(0);
     703 CBC          33 :     text       *p = PG_GETARG_TEXT_PP(1);
     704 GIC          33 :     text       *r = PG_GETARG_TEXT_PP(2);
     705              33 :     int         start = 1;
     706              33 :     int         n = 1;
     707              33 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
     708                 :     pg_re_flags re_flags;
     709                 : 
     710                 :     /* Collect optional parameters */
     711              33 :     if (PG_NARGS() > 3)
     712                 :     {
     713              33 :         start = PG_GETARG_INT32(3);
     714              33 :         if (start <= 0)
     715 CBC           3 :             ereport(ERROR,
     716                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     717 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
     718                 :                             "start", start)));
     719                 :     }
     720 CBC          30 :     if (PG_NARGS() > 4)
     721 ECB             :     {
     722 CBC          27 :         n = PG_GETARG_INT32(4);
     723 GIC          27 :         if (n < 0)
     724               3 :             ereport(ERROR,
     725                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
     726 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
     727                 :                             "n", n)));
     728                 :     }
     729                 : 
     730                 :     /* Determine options */
     731 GIC          27 :     parse_re_flags(&re_flags, flags);
     732                 : 
     733                 :     /* If N was not specified, deduce it from the 'g' flag */
     734              27 :     if (PG_NARGS() <= 4)
     735 CBC           3 :         n = re_flags.glob ? 0 : 1;
     736                 : 
     737 ECB             :     /* Do the replacement(s) */
     738 CBC          27 :     PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
     739 ECB             :                                          re_flags.cflags, PG_GET_COLLATION(),
     740                 :                                          start - 1, n));
     741                 : }
     742                 : 
     743                 : /* This is separate to keep the opr_sanity regression test from complaining */
     744                 : Datum
     745 GIC           3 : textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
     746 ECB             : {
     747 GIC           3 :     return textregexreplace_extended(fcinfo);
     748                 : }
     749 ECB             : 
     750                 : /* This is separate to keep the opr_sanity regression test from complaining */
     751                 : Datum
     752 GIC           3 : textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
     753 ECB             : {
     754 GIC           3 :     return textregexreplace_extended(fcinfo);
     755                 : }
     756                 : 
     757                 : /*
     758                 :  * similar_to_escape(), similar_escape()
     759                 :  *
     760 ECB             :  * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be
     761                 :  * used by our regexp engine.
     762                 :  *
     763                 :  * similar_escape_internal() is the common workhorse for three SQL-exposed
     764                 :  * functions.  esc_text can be passed as NULL to select the default escape
     765                 :  * (which is '\'), or as an empty string to select no escape character.
     766                 :  */
     767                 : static text *
     768 GIC          66 : similar_escape_internal(text *pat_text, text *esc_text)
     769 ECB             : {
     770                 :     text       *result;
     771                 :     char       *p,
     772                 :                *e,
     773                 :                *r;
     774                 :     int         plen,
     775                 :                 elen;
     776 GIC          66 :     bool        afterescape = false;
     777              66 :     bool        incharclass = false;
     778              66 :     int         nquotes = 0;
     779                 : 
     780              66 :     p = VARDATA_ANY(pat_text);
     781              66 :     plen = VARSIZE_ANY_EXHDR(pat_text);
     782              66 :     if (esc_text == NULL)
     783 ECB             :     {
     784                 :         /* No ESCAPE clause provided; default to backslash as escape */
     785 GIC          20 :         e = "\\";
     786              20 :         elen = 1;
     787                 :     }
     788                 :     else
     789                 :     {
     790              46 :         e = VARDATA_ANY(esc_text);
     791 CBC          46 :         elen = VARSIZE_ANY_EXHDR(esc_text);
     792              46 :         if (elen == 0)
     793               3 :             e = NULL;           /* no escape character */
     794 GIC          43 :         else if (elen > 1)
     795 ECB             :         {
     796 CBC           3 :             int         escape_mblen = pg_mbstrlen_with_len(e, elen);
     797 ECB             : 
     798 GIC           3 :             if (escape_mblen > 1)
     799               3 :                 ereport(ERROR,
     800 ECB             :                         (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
     801                 :                          errmsg("invalid escape string"),
     802                 :                          errhint("Escape string must be empty or one character.")));
     803                 :         }
     804                 :     }
     805                 : 
     806                 :     /*----------
     807                 :      * We surround the transformed input string with
     808                 :      *          ^(?: ... )$
     809                 :      * which requires some explanation.  We need "^" and "$" to force
     810                 :      * the pattern to match the entire input string as per the SQL spec.
     811                 :      * The "(?:" and ")" are a non-capturing set of parens; we have to have
     812                 :      * parens in case the string contains "|", else the "^" and "$" will
     813                 :      * be bound into the first and last alternatives which is not what we
     814                 :      * want, and the parens must be non capturing because we don't want them
     815                 :      * to count when selecting output for SUBSTRING.
     816                 :      *
     817                 :      * When the pattern is divided into three parts by escape-double-quotes,
     818                 :      * what we emit is
     819                 :      *          ^(?:part1){1,1}?(part2){1,1}(?:part3)$
     820                 :      * which requires even more explanation.  The "{1,1}?" on part1 makes it
     821                 :      * non-greedy so that it will match the smallest possible amount of text
     822                 :      * not the largest, as required by SQL.  The plain parens around part2
     823                 :      * are capturing parens so that that part is what controls the result of
     824                 :      * SUBSTRING.  The "{1,1}" forces part2 to be greedy, so that it matches
     825                 :      * the largest possible amount of text; hence part3 must match the
     826                 :      * smallest amount of text, as required by SQL.  We don't need an explicit
     827                 :      * greediness marker on part3.  Note that this also confines the effects
     828                 :      * of any "|" characters to the respective part, which is what we want.
     829                 :      *
     830                 :      * The SQL spec says that SUBSTRING's pattern must contain exactly two
     831                 :      * escape-double-quotes, but we only complain if there's more than two.
     832                 :      * With none, we act as though part1 and part3 are empty; with one, we
     833                 :      * act as though part3 is empty.  Both behaviors fall out of omitting
     834                 :      * the relevant part separators in the above expansion.  If the result
     835                 :      * of this function is used in a plain regexp match (SIMILAR TO), the
     836                 :      * escape-double-quotes have no effect on the match behavior.
     837                 :      *----------
     838                 :      */
     839                 : 
     840                 :     /*
     841                 :      * We need room for the prefix/postfix and part separators, plus as many
     842                 :      * as 3 output bytes per input byte; since the input is at most 1GB this
     843                 :      * can't overflow size_t.
     844                 :      */
     845 GIC          63 :     result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen);
     846              63 :     r = VARDATA(result);
     847                 : 
     848              63 :     *r++ = '^';
     849              63 :     *r++ = '(';
     850              63 :     *r++ = '?';
     851              63 :     *r++ = ':';
     852                 : 
     853             479 :     while (plen > 0)
     854                 :     {
     855             419 :         char        pchar = *p;
     856                 : 
     857                 :         /*
     858                 :          * If both the escape character and the current character from the
     859                 :          * pattern are multi-byte, we need to take the slow path.
     860 ECB             :          *
     861                 :          * But if one of them is single-byte, we can process the pattern one
     862                 :          * byte at a time, ignoring multi-byte characters.  (This works
     863                 :          * because all server-encodings have the property that a valid
     864                 :          * multi-byte character representation cannot contain the
     865                 :          * representation of a valid single-byte character.)
     866                 :          */
     867                 : 
     868 CBC         419 :         if (elen > 1)
     869                 :         {
     870 LBC           0 :             int         mblen = pg_mblen(p);
     871                 : 
     872 UIC           0 :             if (mblen > 1)
     873                 :             {
     874                 :                 /* slow, multi-byte path */
     875               0 :                 if (afterescape)
     876                 :                 {
     877               0 :                     *r++ = '\\';
     878               0 :                     memcpy(r, p, mblen);
     879               0 :                     r += mblen;
     880               0 :                     afterescape = false;
     881                 :                 }
     882               0 :                 else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
     883 ECB             :                 {
     884                 :                     /* SQL escape character; do not send to output */
     885 UBC           0 :                     afterescape = true;
     886                 :                 }
     887 EUB             :                 else
     888                 :                 {
     889                 :                     /*
     890                 :                      * We know it's a multi-byte character, so we don't need
     891                 :                      * to do all the comparisons to single-byte characters
     892                 :                      * that we do below.
     893                 :                      */
     894 UBC           0 :                     memcpy(r, p, mblen);
     895               0 :                     r += mblen;
     896                 :                 }
     897 EUB             : 
     898 UIC           0 :                 p += mblen;
     899               0 :                 plen -= mblen;
     900 EUB             : 
     901 UIC           0 :                 continue;
     902                 :             }
     903                 :         }
     904                 : 
     905                 :         /* fast path */
     906 GIC         419 :         if (afterescape)
     907                 :         {
     908              71 :             if (pchar == '"' && !incharclass)  /* escape-double-quote? */
     909 EUB             :             {
     910                 :                 /* emit appropriate part separator, per notes above */
     911 GIC          62 :                 if (nquotes == 0)
     912                 :                 {
     913 GBC          31 :                     *r++ = ')';
     914              31 :                     *r++ = '{';
     915 GIC          31 :                     *r++ = '1';
     916 GBC          31 :                     *r++ = ',';
     917 GIC          31 :                     *r++ = '1';
     918              31 :                     *r++ = '}';
     919              31 :                     *r++ = '?';
     920              31 :                     *r++ = '(';
     921 ECB             :                 }
     922 GIC          31 :                 else if (nquotes == 1)
     923 ECB             :                 {
     924 GIC          28 :                     *r++ = ')';
     925              28 :                     *r++ = '{';
     926 CBC          28 :                     *r++ = '1';
     927 GIC          28 :                     *r++ = ',';
     928 CBC          28 :                     *r++ = '1';
     929              28 :                     *r++ = '}';
     930              28 :                     *r++ = '(';
     931              28 :                     *r++ = '?';
     932              28 :                     *r++ = ':';
     933 ECB             :                 }
     934                 :                 else
     935 CBC           3 :                     ereport(ERROR,
     936                 :                             (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
     937 ECB             :                              errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
     938 GIC          59 :                 nquotes++;
     939 ECB             :             }
     940                 :             else
     941                 :             {
     942                 :                 /*
     943                 :                  * We allow any character at all to be escaped; notably, this
     944                 :                  * allows access to POSIX character-class escapes such as
     945                 :                  * "\d".  The SQL spec is considerably more restrictive.
     946                 :                  */
     947 CBC           9 :                 *r++ = '\\';
     948 GIC           9 :                 *r++ = pchar;
     949                 :             }
     950 CBC          68 :             afterescape = false;
     951                 :         }
     952 GIC         348 :         else if (e && pchar == *e)
     953 ECB             :         {
     954                 :             /* SQL escape character; do not send to output */
     955 GIC          71 :             afterescape = true;
     956                 :         }
     957             277 :         else if (incharclass)
     958                 :         {
     959 UIC           0 :             if (pchar == '\\')
     960               0 :                 *r++ = '\\';
     961               0 :             *r++ = pchar;
     962 LBC           0 :             if (pchar == ']')
     963               0 :                 incharclass = false;
     964                 :         }
     965 CBC         277 :         else if (pchar == '[')
     966                 :         {
     967 LBC           0 :             *r++ = pchar;
     968 UIC           0 :             incharclass = true;
     969                 :         }
     970 CBC         277 :         else if (pchar == '%')
     971                 :         {
     972              54 :             *r++ = '.';
     973 GIC          54 :             *r++ = '*';
     974 EUB             :         }
     975 GBC         223 :         else if (pchar == '_')
     976              26 :             *r++ = '.';
     977             197 :         else if (pchar == '(')
     978 EUB             :         {
     979                 :             /* convert to non-capturing parenthesis */
     980 CBC           9 :             *r++ = '(';
     981 GIC           9 :             *r++ = '?';
     982 GBC           9 :             *r++ = ':';
     983 EUB             :         }
     984 GIC         188 :         else if (pchar == '\\' || pchar == '.' ||
     985 CBC         183 :                  pchar == '^' || pchar == '$')
     986                 :         {
     987               5 :             *r++ = '\\';
     988               5 :             *r++ = pchar;
     989                 :         }
     990 ECB             :         else
     991 CBC         183 :             *r++ = pchar;
     992             416 :         p++, plen--;
     993                 :     }
     994                 : 
     995              60 :     *r++ = ')';
     996              60 :     *r++ = '$';
     997 ECB             : 
     998 GIC          60 :     SET_VARSIZE(result, r - ((char *) result));
     999 ECB             : 
    1000 CBC          60 :     return result;
    1001                 : }
    1002 ECB             : 
    1003                 : /*
    1004                 :  * similar_to_escape(pattern, escape)
    1005                 :  */
    1006                 : Datum
    1007 CBC          46 : similar_to_escape_2(PG_FUNCTION_ARGS)
    1008                 : {
    1009 GIC          46 :     text       *pat_text = PG_GETARG_TEXT_PP(0);
    1010 CBC          46 :     text       *esc_text = PG_GETARG_TEXT_PP(1);
    1011 ECB             :     text       *result;
    1012                 : 
    1013 CBC          46 :     result = similar_escape_internal(pat_text, esc_text);
    1014                 : 
    1015              40 :     PG_RETURN_TEXT_P(result);
    1016                 : }
    1017                 : 
    1018                 : /*
    1019                 :  * similar_to_escape(pattern)
    1020                 :  * Inserts a default escape character.
    1021                 :  */
    1022 ECB             : Datum
    1023 GIC          20 : similar_to_escape_1(PG_FUNCTION_ARGS)
    1024 ECB             : {
    1025 CBC          20 :     text       *pat_text = PG_GETARG_TEXT_PP(0);
    1026                 :     text       *result;
    1027                 : 
    1028              20 :     result = similar_escape_internal(pat_text, NULL);
    1029                 : 
    1030              20 :     PG_RETURN_TEXT_P(result);
    1031                 : }
    1032                 : 
    1033                 : /*
    1034                 :  * similar_escape(pattern, escape)
    1035                 :  *
    1036                 :  * Legacy function for compatibility with views stored using the
    1037                 :  * pre-v13 expansion of SIMILAR TO.  Unlike the above functions, this
    1038 ECB             :  * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL".
    1039                 :  */
    1040                 : Datum
    1041 UIC           0 : similar_escape(PG_FUNCTION_ARGS)
    1042                 : {
    1043 ECB             :     text       *pat_text;
    1044                 :     text       *esc_text;
    1045                 :     text       *result;
    1046                 : 
    1047                 :     /* This function is not strict, so must test explicitly */
    1048 UIC           0 :     if (PG_ARGISNULL(0))
    1049               0 :         PG_RETURN_NULL();
    1050               0 :     pat_text = PG_GETARG_TEXT_PP(0);
    1051                 : 
    1052               0 :     if (PG_ARGISNULL(1))
    1053               0 :         esc_text = NULL;        /* use default escape character */
    1054                 :     else
    1055               0 :         esc_text = PG_GETARG_TEXT_PP(1);
    1056 EUB             : 
    1057 UIC           0 :     result = similar_escape_internal(pat_text, esc_text);
    1058                 : 
    1059               0 :     PG_RETURN_TEXT_P(result);
    1060                 : }
    1061                 : 
    1062                 : /*
    1063 EUB             :  * regexp_count()
    1064                 :  *      Return the number of matches of a pattern within a string.
    1065                 :  */
    1066                 : Datum
    1067 GBC          24 : regexp_count(PG_FUNCTION_ARGS)
    1068 EUB             : {
    1069 GIC          24 :     text       *str = PG_GETARG_TEXT_PP(0);
    1070 GBC          24 :     text       *pattern = PG_GETARG_TEXT_PP(1);
    1071 GIC          24 :     int         start = 1;
    1072 GBC          24 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
    1073                 :     pg_re_flags re_flags;
    1074 EUB             :     regexp_matches_ctx *matchctx;
    1075                 : 
    1076                 :     /* Collect optional parameters */
    1077 GIC          24 :     if (PG_NARGS() > 2)
    1078                 :     {
    1079              21 :         start = PG_GETARG_INT32(2);
    1080              21 :         if (start <= 0)
    1081               6 :             ereport(ERROR,
    1082 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1083                 :                      errmsg("invalid value for parameter \"%s\": %d",
    1084                 :                             "start", start)));
    1085                 :     }
    1086                 : 
    1087                 :     /* Determine options */
    1088 GIC          18 :     parse_re_flags(&re_flags, flags);
    1089                 :     /* User mustn't specify 'g' */
    1090              18 :     if (re_flags.glob)
    1091 UIC           0 :         ereport(ERROR,
    1092 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1093                 :         /* translator: %s is a SQL function name */
    1094                 :                  errmsg("%s does not support the \"global\" option",
    1095                 :                         "regexp_count()")));
    1096                 :     /* But we find all the matches anyway */
    1097 GIC          18 :     re_flags.glob = true;
    1098                 : 
    1099                 :     /* Do the matching */
    1100              18 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
    1101                 :                                     PG_GET_COLLATION(),
    1102                 :                                     false,  /* can ignore subexprs */
    1103 ECB             :                                     false, false);
    1104                 : 
    1105 CBC          18 :     PG_RETURN_INT32(matchctx->nmatches);
    1106 EUB             : }
    1107                 : 
    1108                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1109                 : Datum
    1110 GIC           3 : regexp_count_no_start(PG_FUNCTION_ARGS)
    1111                 : {
    1112 CBC           3 :     return regexp_count(fcinfo);
    1113                 : }
    1114                 : 
    1115 ECB             : /* This is separate to keep the opr_sanity regression test from complaining */
    1116                 : Datum
    1117 GIC          15 : regexp_count_no_flags(PG_FUNCTION_ARGS)
    1118                 : {
    1119              15 :     return regexp_count(fcinfo);
    1120 ECB             : }
    1121                 : 
    1122                 : /*
    1123                 :  * regexp_instr()
    1124                 :  *      Return the match's position within the string
    1125                 :  */
    1126                 : Datum
    1127 CBC          78 : regexp_instr(PG_FUNCTION_ARGS)
    1128                 : {
    1129 GIC          78 :     text       *str = PG_GETARG_TEXT_PP(0);
    1130              78 :     text       *pattern = PG_GETARG_TEXT_PP(1);
    1131              78 :     int         start = 1;
    1132 CBC          78 :     int         n = 1;
    1133 GIC          78 :     int         endoption = 0;
    1134 CBC          78 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
    1135 GIC          78 :     int         subexpr = 0;
    1136                 :     int         pos;
    1137                 :     pg_re_flags re_flags;
    1138                 :     regexp_matches_ctx *matchctx;
    1139                 : 
    1140                 :     /* Collect optional parameters */
    1141              78 :     if (PG_NARGS() > 2)
    1142 ECB             :     {
    1143 GIC          69 :         start = PG_GETARG_INT32(2);
    1144 CBC          69 :         if (start <= 0)
    1145               3 :             ereport(ERROR,
    1146 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1147                 :                      errmsg("invalid value for parameter \"%s\": %d",
    1148                 :                             "start", start)));
    1149                 :     }
    1150 CBC          75 :     if (PG_NARGS() > 3)
    1151                 :     {
    1152 GIC          63 :         n = PG_GETARG_INT32(3);
    1153              63 :         if (n <= 0)
    1154               3 :             ereport(ERROR,
    1155                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1156 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
    1157                 :                             "n", n)));
    1158                 :     }
    1159 CBC          72 :     if (PG_NARGS() > 4)
    1160 ECB             :     {
    1161 GIC          54 :         endoption = PG_GETARG_INT32(4);
    1162              54 :         if (endoption != 0 && endoption != 1)
    1163               6 :             ereport(ERROR,
    1164                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1165 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
    1166                 :                             "endoption", endoption)));
    1167                 :     }
    1168 CBC          66 :     if (PG_NARGS() > 6)
    1169 ECB             :     {
    1170 GIC          42 :         subexpr = PG_GETARG_INT32(6);
    1171              42 :         if (subexpr < 0)
    1172               3 :             ereport(ERROR,
    1173                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1174 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
    1175                 :                             "subexpr", subexpr)));
    1176                 :     }
    1177                 : 
    1178                 :     /* Determine options */
    1179 GIC          63 :     parse_re_flags(&re_flags, flags);
    1180                 :     /* User mustn't specify 'g' */
    1181              63 :     if (re_flags.glob)
    1182               3 :         ereport(ERROR,
    1183 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1184                 :         /* translator: %s is a SQL function name */
    1185                 :                  errmsg("%s does not support the \"global\" option",
    1186                 :                         "regexp_instr()")));
    1187                 :     /* But we find all the matches anyway */
    1188 GIC          60 :     re_flags.glob = true;
    1189                 : 
    1190                 :     /* Do the matching */
    1191              60 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
    1192                 :                                     PG_GET_COLLATION(),
    1193                 :                                     (subexpr > 0),   /* need submatches? */
    1194 ECB             :                                     false, false);
    1195                 : 
    1196                 :     /* When n exceeds matches return 0 (includes case of no matches) */
    1197 CBC          60 :     if (n > matchctx->nmatches)
    1198 GIC           6 :         PG_RETURN_INT32(0);
    1199                 : 
    1200                 :     /* When subexpr exceeds number of subexpressions return 0 */
    1201              54 :     if (subexpr > matchctx->npatterns)
    1202               6 :         PG_RETURN_INT32(0);
    1203 ECB             : 
    1204                 :     /* Select the appropriate match position to return */
    1205 GIC          48 :     pos = (n - 1) * matchctx->npatterns;
    1206 CBC          48 :     if (subexpr > 0)
    1207 GIC          27 :         pos += subexpr - 1;
    1208              48 :     pos *= 2;
    1209              48 :     if (endoption == 1)
    1210              15 :         pos += 1;
    1211                 : 
    1212 CBC          48 :     if (matchctx->match_locs[pos] >= 0)
    1213              45 :         PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
    1214                 :     else
    1215 GIC           3 :         PG_RETURN_INT32(0);     /* position not identifiable */
    1216 ECB             : }
    1217                 : 
    1218                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1219                 : Datum
    1220 CBC           9 : regexp_instr_no_start(PG_FUNCTION_ARGS)
    1221 ECB             : {
    1222 CBC           9 :     return regexp_instr(fcinfo);
    1223 ECB             : }
    1224                 : 
    1225                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1226                 : Datum
    1227 CBC           3 : regexp_instr_no_n(PG_FUNCTION_ARGS)
    1228 ECB             : {
    1229 GIC           3 :     return regexp_instr(fcinfo);
    1230 ECB             : }
    1231                 : 
    1232                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1233                 : Datum
    1234 GIC          12 : regexp_instr_no_endoption(PG_FUNCTION_ARGS)
    1235 ECB             : {
    1236 GIC          12 :     return regexp_instr(fcinfo);
    1237 ECB             : }
    1238                 : 
    1239                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1240                 : Datum
    1241 GIC           6 : regexp_instr_no_flags(PG_FUNCTION_ARGS)
    1242 ECB             : {
    1243 GIC           6 :     return regexp_instr(fcinfo);
    1244 ECB             : }
    1245                 : 
    1246                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1247                 : Datum
    1248 GIC           6 : regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
    1249 ECB             : {
    1250 GIC           6 :     return regexp_instr(fcinfo);
    1251 ECB             : }
    1252                 : 
    1253                 : /*
    1254                 :  * regexp_like()
    1255                 :  *      Test for a pattern match within a string.
    1256                 :  */
    1257                 : Datum
    1258 CBC          15 : regexp_like(PG_FUNCTION_ARGS)
    1259                 : {
    1260 GIC          15 :     text       *str = PG_GETARG_TEXT_PP(0);
    1261              15 :     text       *pattern = PG_GETARG_TEXT_PP(1);
    1262              15 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
    1263 ECB             :     pg_re_flags re_flags;
    1264                 : 
    1265                 :     /* Determine options */
    1266 GIC          15 :     parse_re_flags(&re_flags, flags);
    1267                 :     /* User mustn't specify 'g' */
    1268              15 :     if (re_flags.glob)
    1269               3 :         ereport(ERROR,
    1270                 :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1271                 :         /* translator: %s is a SQL function name */
    1272                 :                  errmsg("%s does not support the \"global\" option",
    1273 ECB             :                         "regexp_like()")));
    1274                 : 
    1275                 :     /* Otherwise it's like textregexeq/texticregexeq */
    1276 CBC          12 :     PG_RETURN_BOOL(RE_compile_and_execute(pattern,
    1277 ECB             :                                           VARDATA_ANY(str),
    1278                 :                                           VARSIZE_ANY_EXHDR(str),
    1279                 :                                           re_flags.cflags,
    1280                 :                                           PG_GET_COLLATION(),
    1281                 :                                           0, NULL));
    1282                 : }
    1283                 : 
    1284                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1285                 : Datum
    1286 GIC           3 : regexp_like_no_flags(PG_FUNCTION_ARGS)
    1287                 : {
    1288               3 :     return regexp_like(fcinfo);
    1289                 : }
    1290                 : 
    1291 ECB             : /*
    1292                 :  * regexp_match()
    1293                 :  *      Return the first substring(s) matching a pattern within a string.
    1294                 :  */
    1295                 : Datum
    1296 GIC        1278 : regexp_match(PG_FUNCTION_ARGS)
    1297                 : {
    1298            1278 :     text       *orig_str = PG_GETARG_TEXT_PP(0);
    1299            1278 :     text       *pattern = PG_GETARG_TEXT_PP(1);
    1300            1278 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
    1301 ECB             :     pg_re_flags re_flags;
    1302                 :     regexp_matches_ctx *matchctx;
    1303                 : 
    1304                 :     /* Determine options */
    1305 GIC        1278 :     parse_re_flags(&re_flags, flags);
    1306                 :     /* User mustn't specify 'g' */
    1307            1278 :     if (re_flags.glob)
    1308               4 :         ereport(ERROR,
    1309                 :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1310                 :         /* translator: %s is a SQL function name */
    1311 ECB             :                  errmsg("%s does not support the \"global\" option",
    1312                 :                         "regexp_match()"),
    1313                 :                  errhint("Use the regexp_matches function instead.")));
    1314                 : 
    1315 CBC        1274 :     matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
    1316                 :                                     PG_GET_COLLATION(), true, false, false);
    1317                 : 
    1318 GIC        1274 :     if (matchctx->nmatches == 0)
    1319              85 :         PG_RETURN_NULL();
    1320 ECB             : 
    1321 GIC        1189 :     Assert(matchctx->nmatches == 1);
    1322 ECB             : 
    1323                 :     /* Create workspace that build_regexp_match_result needs */
    1324 GIC        1189 :     matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
    1325            1189 :     matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
    1326                 : 
    1327            1189 :     PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx)));
    1328                 : }
    1329                 : 
    1330 ECB             : /* This is separate to keep the opr_sanity regression test from complaining */
    1331                 : Datum
    1332 GIC        1263 : regexp_match_no_flags(PG_FUNCTION_ARGS)
    1333 ECB             : {
    1334 CBC        1263 :     return regexp_match(fcinfo);
    1335                 : }
    1336 ECB             : 
    1337                 : /*
    1338                 :  * regexp_matches()
    1339                 :  *      Return a table of all matches of a pattern within a string.
    1340                 :  */
    1341                 : Datum
    1342 CBC         339 : regexp_matches(PG_FUNCTION_ARGS)
    1343                 : {
    1344                 :     FuncCallContext *funcctx;
    1345                 :     regexp_matches_ctx *matchctx;
    1346                 : 
    1347             339 :     if (SRF_IS_FIRSTCALL())
    1348                 :     {
    1349             144 :         text       *pattern = PG_GETARG_TEXT_PP(1);
    1350 GIC         144 :         text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
    1351                 :         pg_re_flags re_flags;
    1352                 :         MemoryContext oldcontext;
    1353                 : 
    1354             144 :         funcctx = SRF_FIRSTCALL_INIT();
    1355             144 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1356                 : 
    1357 ECB             :         /* Determine options */
    1358 GIC         144 :         parse_re_flags(&re_flags, flags);
    1359                 : 
    1360                 :         /* be sure to copy the input string into the multi-call ctx */
    1361             141 :         matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
    1362 ECB             :                                         &re_flags, 0,
    1363                 :                                         PG_GET_COLLATION(),
    1364                 :                                         true, false, false);
    1365                 : 
    1366                 :         /* Pre-create workspace that build_regexp_match_result needs */
    1367 GIC         135 :         matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
    1368             135 :         matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
    1369 ECB             : 
    1370 CBC         135 :         MemoryContextSwitchTo(oldcontext);
    1371 GIC         135 :         funcctx->user_fctx = (void *) matchctx;
    1372                 :     }
    1373 ECB             : 
    1374 GIC         330 :     funcctx = SRF_PERCALL_SETUP();
    1375             330 :     matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
    1376 ECB             : 
    1377 GIC         330 :     if (matchctx->next_match < matchctx->nmatches)
    1378                 :     {
    1379                 :         ArrayType  *result_ary;
    1380                 : 
    1381             195 :         result_ary = build_regexp_match_result(matchctx);
    1382 CBC         195 :         matchctx->next_match++;
    1383             195 :         SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
    1384                 :     }
    1385 ECB             : 
    1386 CBC         135 :     SRF_RETURN_DONE(funcctx);
    1387                 : }
    1388                 : 
    1389 ECB             : /* This is separate to keep the opr_sanity regression test from complaining */
    1390                 : Datum
    1391 GIC         177 : regexp_matches_no_flags(PG_FUNCTION_ARGS)
    1392 ECB             : {
    1393 GIC         177 :     return regexp_matches(fcinfo);
    1394                 : }
    1395                 : 
    1396 ECB             : /*
    1397                 :  * setup_regexp_matches --- do the initial matching for regexp_match,
    1398                 :  *      regexp_split, and related functions
    1399                 :  *
    1400                 :  * To avoid having to re-find the compiled pattern on each call, we do
    1401                 :  * all the matching in one swoop.  The returned regexp_matches_ctx contains
    1402                 :  * the locations of all the substrings matching the pattern.
    1403                 :  *
    1404                 :  * start_search: the character (not byte) offset in orig_str at which to
    1405                 :  * begin the search.  Returned positions are relative to orig_str anyway.
    1406                 :  * use_subpatterns: collect data about matches to parenthesized subexpressions.
    1407                 :  * ignore_degenerate: ignore zero-length matches.
    1408                 :  * fetching_unmatched: caller wants to fetch unmatched substrings.
    1409                 :  *
    1410                 :  * We don't currently assume that fetching_unmatched is exclusive of fetching
    1411                 :  * the matched text too; if it's set, the conversion buffer is large enough to
    1412                 :  * fetch any single matched or unmatched string, but not any larger
    1413                 :  * substring.  (In practice, when splitting the matches are usually small
    1414                 :  * anyway, and it didn't seem worth complicating the code further.)
    1415                 :  */
    1416                 : static regexp_matches_ctx *
    1417 GIC      101727 : setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
    1418                 :                      int start_search,
    1419                 :                      Oid collation,
    1420                 :                      bool use_subpatterns,
    1421                 :                      bool ignore_degenerate,
    1422                 :                      bool fetching_unmatched)
    1423                 : {
    1424          101727 :     regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
    1425          101727 :     int         eml = pg_database_encoding_max_length();
    1426                 :     int         orig_len;
    1427                 :     pg_wchar   *wide_str;
    1428                 :     int         wide_len;
    1429                 :     int         cflags;
    1430                 :     regex_t    *cpattern;
    1431                 :     regmatch_t *pmatch;
    1432 ECB             :     int         pmatch_len;
    1433                 :     int         array_len;
    1434                 :     int         array_idx;
    1435                 :     int         prev_match_end;
    1436                 :     int         prev_valid_match_end;
    1437 GIC      101727 :     int         maxlen = 0;     /* largest fetch length in characters */
    1438                 : 
    1439 ECB             :     /* save original string --- we'll extract result substrings from it */
    1440 CBC      101727 :     matchctx->orig_str = orig_str;
    1441                 : 
    1442                 :     /* convert string to pg_wchar form for matching */
    1443 GIC      101727 :     orig_len = VARSIZE_ANY_EXHDR(orig_str);
    1444          101727 :     wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
    1445          101727 :     wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
    1446                 : 
    1447                 :     /* set up the compiled pattern */
    1448          101727 :     cflags = re_flags->cflags;
    1449          101727 :     if (!use_subpatterns)
    1450          100261 :         cflags |= REG_NOSUB;
    1451          101727 :     cpattern = RE_compile_and_cache(pattern, cflags, collation);
    1452 ECB             : 
    1453                 :     /* do we want to remember subpatterns? */
    1454 GIC      101721 :     if (use_subpatterns && cpattern->re_nsub > 0)
    1455 ECB             :     {
    1456 GIC        1355 :         matchctx->npatterns = cpattern->re_nsub;
    1457            1355 :         pmatch_len = cpattern->re_nsub + 1;
    1458 ECB             :     }
    1459                 :     else
    1460                 :     {
    1461 GIC      100366 :         use_subpatterns = false;
    1462          100366 :         matchctx->npatterns = 1;
    1463 CBC      100366 :         pmatch_len = 1;
    1464 ECB             :     }
    1465                 : 
    1466                 :     /* temporary output space for RE package */
    1467 GIC      101721 :     pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
    1468                 : 
    1469 ECB             :     /*
    1470                 :      * the real output space (grown dynamically if needed)
    1471                 :      *
    1472                 :      * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
    1473                 :      * than at 2^27
    1474                 :      */
    1475 GIC      101721 :     array_len = re_flags->glob ? 255 : 31;
    1476 CBC      101721 :     matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
    1477          101721 :     array_idx = 0;
    1478 ECB             : 
    1479                 :     /* search for the pattern, perhaps repeatedly */
    1480 GIC      101721 :     prev_match_end = 0;
    1481          101721 :     prev_valid_match_end = 0;
    1482 CBC      547587 :     while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
    1483                 :                             pmatch_len, pmatch))
    1484                 :     {
    1485                 :         /*
    1486                 :          * If requested, ignore degenerate matches, which are zero-length
    1487                 :          * matches occurring at the start or end of a string or just after a
    1488                 :          * previous match.
    1489                 :          */
    1490          447173 :         if (!ignore_degenerate ||
    1491          445591 :             (pmatch[0].rm_so < wide_len &&
    1492          445570 :              pmatch[0].rm_eo > prev_match_end))
    1493                 :         {
    1494                 :             /* enlarge output space if needed */
    1495          447263 :             while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
    1496 ECB             :             {
    1497 CBC         180 :                 array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
    1498 GIC         180 :                 if (array_len > MaxAllocSize / sizeof(int))
    1499 UIC           0 :                     ereport(ERROR,
    1500                 :                             (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
    1501                 :                              errmsg("too many regular expression matches")));
    1502 GIC         180 :                 matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
    1503                 :                                                         sizeof(int) * array_len);
    1504                 :             }
    1505 ECB             : 
    1506                 :             /* save this match's locations */
    1507 CBC      447083 :             if (use_subpatterns)
    1508                 :             {
    1509                 :                 int         i;
    1510 ECB             : 
    1511 GIC        3936 :                 for (i = 1; i <= matchctx->npatterns; i++)
    1512 ECB             :                 {
    1513 CBC        2657 :                     int         so = pmatch[i].rm_so;
    1514 GBC        2657 :                     int         eo = pmatch[i].rm_eo;
    1515                 : 
    1516 GIC        2657 :                     matchctx->match_locs[array_idx++] = so;
    1517 CBC        2657 :                     matchctx->match_locs[array_idx++] = eo;
    1518 GIC        2657 :                     if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
    1519            1710 :                         maxlen = (eo - so);
    1520                 :                 }
    1521                 :             }
    1522 ECB             :             else
    1523                 :             {
    1524 GIC      445804 :                 int         so = pmatch[0].rm_so;
    1525          445804 :                 int         eo = pmatch[0].rm_eo;
    1526 ECB             : 
    1527 GIC      445804 :                 matchctx->match_locs[array_idx++] = so;
    1528 CBC      445804 :                 matchctx->match_locs[array_idx++] = eo;
    1529          445804 :                 if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
    1530 GIC      100283 :                     maxlen = (eo - so);
    1531 ECB             :             }
    1532 CBC      447083 :             matchctx->nmatches++;
    1533 ECB             : 
    1534                 :             /*
    1535                 :              * check length of unmatched portion between end of previous valid
    1536                 :              * (nondegenerate, or degenerate but not ignored) match and start
    1537                 :              * of current one
    1538                 :              */
    1539 CBC      447083 :             if (fetching_unmatched &&
    1540          445501 :                 pmatch[0].rm_so >= 0 &&
    1541 GIC      445501 :                 (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
    1542 CBC      190379 :                 maxlen = (pmatch[0].rm_so - prev_valid_match_end);
    1543          447083 :             prev_valid_match_end = pmatch[0].rm_eo;
    1544 ECB             :         }
    1545 CBC      447173 :         prev_match_end = pmatch[0].rm_eo;
    1546                 : 
    1547 ECB             :         /* if not glob, stop after one match */
    1548 GIC      447173 :         if (!re_flags->glob)
    1549            1274 :             break;
    1550                 : 
    1551                 :         /*
    1552                 :          * Advance search position.  Normally we start the next search at the
    1553                 :          * end of the previous match; but if the match was of zero length, we
    1554 ECB             :          * have to advance by one character, or we'd just find the same match
    1555                 :          * again.
    1556                 :          */
    1557 CBC      445899 :         start_search = prev_match_end;
    1558          445899 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
    1559 GIC         588 :             start_search++;
    1560 CBC      445899 :         if (start_search > wide_len)
    1561 GIC          33 :             break;
    1562                 :     }
    1563 ECB             : 
    1564                 :     /*
    1565                 :      * check length of unmatched portion between end of last match and end of
    1566                 :      * input string
    1567                 :      */
    1568 GIC      101721 :     if (fetching_unmatched &&
    1569          100192 :         (wide_len - prev_valid_match_end) > maxlen)
    1570              14 :         maxlen = (wide_len - prev_valid_match_end);
    1571                 : 
    1572 ECB             :     /*
    1573                 :      * Keep a note of the end position of the string for the benefit of
    1574                 :      * splitting code.
    1575                 :      */
    1576 CBC      101721 :     matchctx->match_locs[array_idx] = wide_len;
    1577                 : 
    1578 GIC      101721 :     if (eml > 1)
    1579                 :     {
    1580          101721 :         int64       maxsiz = eml * (int64) maxlen;
    1581                 :         int         conv_bufsiz;
    1582                 : 
    1583 ECB             :         /*
    1584                 :          * Make the conversion buffer large enough for any substring of
    1585                 :          * interest.
    1586                 :          *
    1587                 :          * Worst case: assume we need the maximum size (maxlen*eml), but take
    1588                 :          * advantage of the fact that the original string length in bytes is
    1589                 :          * an upper bound on the byte length of any fetched substring (and we
    1590                 :          * know that len+1 is safe to allocate because the varlena header is
    1591                 :          * longer than 1 byte).
    1592                 :          */
    1593 CBC      101721 :         if (maxsiz > orig_len)
    1594 GIC      100240 :             conv_bufsiz = orig_len + 1;
    1595 ECB             :         else
    1596 GIC        1481 :             conv_bufsiz = maxsiz + 1;   /* safe since maxsiz < 2^30 */
    1597                 : 
    1598          101721 :         matchctx->conv_buf = palloc(conv_bufsiz);
    1599          101721 :         matchctx->conv_bufsiz = conv_bufsiz;
    1600          101721 :         matchctx->wide_str = wide_str;
    1601                 :     }
    1602                 :     else
    1603                 :     {
    1604                 :         /* No need to keep the wide string if we're in a single-byte charset. */
    1605 UIC           0 :         pfree(wide_str);
    1606               0 :         matchctx->wide_str = NULL;
    1607               0 :         matchctx->conv_buf = NULL;
    1608 LBC           0 :         matchctx->conv_bufsiz = 0;
    1609 ECB             :     }
    1610                 : 
    1611                 :     /* Clean up temp storage */
    1612 GIC      101721 :     pfree(pmatch);
    1613 ECB             : 
    1614 CBC      101721 :     return matchctx;
    1615 ECB             : }
    1616                 : 
    1617                 : /*
    1618                 :  * build_regexp_match_result - build output array for current match
    1619                 :  */
    1620 EUB             : static ArrayType *
    1621 GBC        1384 : build_regexp_match_result(regexp_matches_ctx *matchctx)
    1622 EUB             : {
    1623 GBC        1384 :     char       *buf = matchctx->conv_buf;
    1624 GIC        1384 :     Datum      *elems = matchctx->elems;
    1625            1384 :     bool       *nulls = matchctx->nulls;
    1626                 :     int         dims[1];
    1627 ECB             :     int         lbs[1];
    1628                 :     int         loc;
    1629                 :     int         i;
    1630                 : 
    1631                 :     /* Extract matching substrings from the original string */
    1632 GIC        1384 :     loc = matchctx->next_match * matchctx->npatterns * 2;
    1633            4011 :     for (i = 0; i < matchctx->npatterns; i++)
    1634                 :     {
    1635            2627 :         int         so = matchctx->match_locs[loc++];
    1636 CBC        2627 :         int         eo = matchctx->match_locs[loc++];
    1637                 : 
    1638            2627 :         if (so < 0 || eo < 0)
    1639 ECB             :         {
    1640 CBC           3 :             elems[i] = (Datum) 0;
    1641 GIC           3 :             nulls[i] = true;
    1642                 :         }
    1643            2624 :         else if (buf)
    1644                 :         {
    1645            2624 :             int         len = pg_wchar2mb_with_len(matchctx->wide_str + so,
    1646                 :                                                    buf,
    1647 ECB             :                                                    eo - so);
    1648                 : 
    1649 GIC        2624 :             Assert(len < matchctx->conv_bufsiz);
    1650 CBC        2624 :             elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
    1651            2624 :             nulls[i] = false;
    1652                 :         }
    1653 ECB             :         else
    1654                 :         {
    1655 LBC           0 :             elems[i] = DirectFunctionCall3(text_substr,
    1656 ECB             :                                            PointerGetDatum(matchctx->orig_str),
    1657                 :                                            Int32GetDatum(so + 1),
    1658                 :                                            Int32GetDatum(eo - so));
    1659 UIC           0 :             nulls[i] = false;
    1660 ECB             :         }
    1661                 :     }
    1662                 : 
    1663                 :     /* And form an array */
    1664 CBC        1384 :     dims[0] = matchctx->npatterns;
    1665            1384 :     lbs[0] = 1;
    1666 ECB             :     /* XXX: this hardcodes assumptions about the text type */
    1667 GIC        1384 :     return construct_md_array(elems, nulls, 1, dims, lbs,
    1668                 :                               TEXTOID, -1, false, TYPALIGN_INT);
    1669                 : }
    1670 EUB             : 
    1671                 : /*
    1672                 :  * regexp_split_to_table()
    1673                 :  *      Split the string at matches of the pattern, returning the
    1674                 :  *      split-out substrings as a table.
    1675                 :  */
    1676                 : Datum
    1677 GIC         311 : regexp_split_to_table(PG_FUNCTION_ARGS)
    1678                 : {
    1679 ECB             :     FuncCallContext *funcctx;
    1680                 :     regexp_matches_ctx *splitctx;
    1681                 : 
    1682 CBC         311 :     if (SRF_IS_FIRSTCALL())
    1683                 :     {
    1684 GIC          26 :         text       *pattern = PG_GETARG_TEXT_PP(1);
    1685              26 :         text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
    1686                 :         pg_re_flags re_flags;
    1687                 :         MemoryContext oldcontext;
    1688                 : 
    1689              26 :         funcctx = SRF_FIRSTCALL_INIT();
    1690              26 :         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
    1691                 : 
    1692 ECB             :         /* Determine options */
    1693 GIC          26 :         parse_re_flags(&re_flags, flags);
    1694                 :         /* User mustn't specify 'g' */
    1695              23 :         if (re_flags.glob)
    1696               3 :             ereport(ERROR,
    1697 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1698                 :             /* translator: %s is a SQL function name */
    1699                 :                      errmsg("%s does not support the \"global\" option",
    1700                 :                             "regexp_split_to_table()")));
    1701                 :         /* But we find all the matches anyway */
    1702 GIC          20 :         re_flags.glob = true;
    1703                 : 
    1704 ECB             :         /* be sure to copy the input string into the multi-call ctx */
    1705 CBC          20 :         splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
    1706                 :                                         &re_flags, 0,
    1707                 :                                         PG_GET_COLLATION(),
    1708 ECB             :                                         false, true, true);
    1709                 : 
    1710 CBC          20 :         MemoryContextSwitchTo(oldcontext);
    1711              20 :         funcctx->user_fctx = (void *) splitctx;
    1712                 :     }
    1713                 : 
    1714 GIC         305 :     funcctx = SRF_PERCALL_SETUP();
    1715             305 :     splitctx = (regexp_matches_ctx *) funcctx->user_fctx;
    1716                 : 
    1717 CBC         305 :     if (splitctx->next_match <= splitctx->nmatches)
    1718                 :     {
    1719 GIC         285 :         Datum       result = build_regexp_split_result(splitctx);
    1720 ECB             : 
    1721 GIC         285 :         splitctx->next_match++;
    1722             285 :         SRF_RETURN_NEXT(funcctx, result);
    1723                 :     }
    1724                 : 
    1725 CBC          20 :     SRF_RETURN_DONE(funcctx);
    1726 ECB             : }
    1727                 : 
    1728                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1729                 : Datum
    1730 CBC         276 : regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
    1731                 : {
    1732             276 :     return regexp_split_to_table(fcinfo);
    1733                 : }
    1734 ECB             : 
    1735                 : /*
    1736                 :  * regexp_split_to_array()
    1737                 :  *      Split the string at matches of the pattern, returning the
    1738                 :  *      split-out substrings as an array.
    1739                 :  */
    1740                 : Datum
    1741 GIC      100178 : regexp_split_to_array(PG_FUNCTION_ARGS)
    1742                 : {
    1743          100178 :     ArrayBuildState *astate = NULL;
    1744                 :     pg_re_flags re_flags;
    1745 ECB             :     regexp_matches_ctx *splitctx;
    1746                 : 
    1747                 :     /* Determine options */
    1748 GIC      100178 :     parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
    1749                 :     /* User mustn't specify 'g' */
    1750          100175 :     if (re_flags.glob)
    1751               3 :         ereport(ERROR,
    1752                 :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1753                 :         /* translator: %s is a SQL function name */
    1754                 :                  errmsg("%s does not support the \"global\" option",
    1755                 :                         "regexp_split_to_array()")));
    1756 ECB             :     /* But we find all the matches anyway */
    1757 GIC      100172 :     re_flags.glob = true;
    1758 ECB             : 
    1759 GIC      100172 :     splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
    1760          100172 :                                     PG_GETARG_TEXT_PP(1),
    1761                 :                                     &re_flags, 0,
    1762                 :                                     PG_GET_COLLATION(),
    1763 ECB             :                                     false, true, true);
    1764                 : 
    1765 CBC      645580 :     while (splitctx->next_match <= splitctx->nmatches)
    1766 ECB             :     {
    1767 GIC      545408 :         astate = accumArrayResult(astate,
    1768                 :                                   build_regexp_split_result(splitctx),
    1769                 :                                   false,
    1770                 :                                   TEXTOID,
    1771                 :                                   CurrentMemoryContext);
    1772 CBC      545408 :         splitctx->next_match++;
    1773                 :     }
    1774 ECB             : 
    1775 GNC      100172 :     PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
    1776                 : }
    1777                 : 
    1778                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1779                 : Datum
    1780 CBC      100157 : regexp_split_to_array_no_flags(PG_FUNCTION_ARGS)
    1781                 : {
    1782          100157 :     return regexp_split_to_array(fcinfo);
    1783                 : }
    1784                 : 
    1785                 : /*
    1786                 :  * build_regexp_split_result - build output string for current match
    1787 ECB             :  *
    1788                 :  * We return the string between the current match and the previous one,
    1789                 :  * or the string after the last match when next_match == nmatches.
    1790                 :  */
    1791                 : static Datum
    1792 GIC      545693 : build_regexp_split_result(regexp_matches_ctx *splitctx)
    1793                 : {
    1794          545693 :     char       *buf = splitctx->conv_buf;
    1795 ECB             :     int         startpos;
    1796                 :     int         endpos;
    1797                 : 
    1798 GIC      545693 :     if (splitctx->next_match > 0)
    1799          445501 :         startpos = splitctx->match_locs[splitctx->next_match * 2 - 1];
    1800                 :     else
    1801          100192 :         startpos = 0;
    1802          545693 :     if (startpos < 0)
    1803 UIC           0 :         elog(ERROR, "invalid match ending position");
    1804                 : 
    1805 GIC      545693 :     endpos = splitctx->match_locs[splitctx->next_match * 2];
    1806          545693 :     if (endpos < startpos)
    1807 LBC           0 :         elog(ERROR, "invalid match starting position");
    1808                 : 
    1809 CBC      545693 :     if (buf)
    1810                 :     {
    1811                 :         int         len;
    1812                 : 
    1813          545693 :         len = pg_wchar2mb_with_len(splitctx->wide_str + startpos,
    1814 ECB             :                                    buf,
    1815                 :                                    endpos - startpos);
    1816 CBC      545693 :         Assert(len < splitctx->conv_bufsiz);
    1817          545693 :         return PointerGetDatum(cstring_to_text_with_len(buf, len));
    1818 EUB             :     }
    1819                 :     else
    1820 ECB             :     {
    1821 LBC           0 :         return DirectFunctionCall3(text_substr,
    1822 EUB             :                                    PointerGetDatum(splitctx->orig_str),
    1823                 :                                    Int32GetDatum(startpos + 1),
    1824 ECB             :                                    Int32GetDatum(endpos - startpos));
    1825                 :     }
    1826                 : }
    1827                 : 
    1828                 : /*
    1829                 :  * regexp_substr()
    1830                 :  *      Return the substring that matches a regular expression pattern
    1831                 :  */
    1832                 : Datum
    1833 GIC          54 : regexp_substr(PG_FUNCTION_ARGS)
    1834                 : {
    1835              54 :     text       *str = PG_GETARG_TEXT_PP(0);
    1836 GBC          54 :     text       *pattern = PG_GETARG_TEXT_PP(1);
    1837 GIC          54 :     int         start = 1;
    1838              54 :     int         n = 1;
    1839              54 :     text       *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
    1840              54 :     int         subexpr = 0;
    1841                 :     int         so,
    1842                 :                 eo,
    1843                 :                 pos;
    1844                 :     pg_re_flags re_flags;
    1845                 :     regexp_matches_ctx *matchctx;
    1846                 : 
    1847                 :     /* Collect optional parameters */
    1848 CBC          54 :     if (PG_NARGS() > 2)
    1849                 :     {
    1850              45 :         start = PG_GETARG_INT32(2);
    1851              45 :         if (start <= 0)
    1852               3 :             ereport(ERROR,
    1853 ECB             :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1854                 :                      errmsg("invalid value for parameter \"%s\": %d",
    1855                 :                             "start", start)));
    1856                 :     }
    1857 GIC          51 :     if (PG_NARGS() > 3)
    1858                 :     {
    1859              39 :         n = PG_GETARG_INT32(3);
    1860              39 :         if (n <= 0)
    1861               3 :             ereport(ERROR,
    1862                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1863 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
    1864                 :                             "n", n)));
    1865                 :     }
    1866 CBC          48 :     if (PG_NARGS() > 5)
    1867 ECB             :     {
    1868 GIC          24 :         subexpr = PG_GETARG_INT32(5);
    1869              24 :         if (subexpr < 0)
    1870               3 :             ereport(ERROR,
    1871                 :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1872 ECB             :                      errmsg("invalid value for parameter \"%s\": %d",
    1873                 :                             "subexpr", subexpr)));
    1874                 :     }
    1875                 : 
    1876                 :     /* Determine options */
    1877 GIC          45 :     parse_re_flags(&re_flags, flags);
    1878                 :     /* User mustn't specify 'g' */
    1879              45 :     if (re_flags.glob)
    1880               3 :         ereport(ERROR,
    1881 ECB             :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
    1882                 :         /* translator: %s is a SQL function name */
    1883                 :                  errmsg("%s does not support the \"global\" option",
    1884                 :                         "regexp_substr()")));
    1885                 :     /* But we find all the matches anyway */
    1886 GIC          42 :     re_flags.glob = true;
    1887                 : 
    1888                 :     /* Do the matching */
    1889              42 :     matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
    1890                 :                                     PG_GET_COLLATION(),
    1891                 :                                     (subexpr > 0),   /* need submatches? */
    1892 ECB             :                                     false, false);
    1893                 : 
    1894                 :     /* When n exceeds matches return NULL (includes case of no matches) */
    1895 CBC          42 :     if (n > matchctx->nmatches)
    1896 GIC           6 :         PG_RETURN_NULL();
    1897                 : 
    1898                 :     /* When subexpr exceeds number of subexpressions return NULL */
    1899              36 :     if (subexpr > matchctx->npatterns)
    1900               3 :         PG_RETURN_NULL();
    1901 ECB             : 
    1902                 :     /* Select the appropriate match position to return */
    1903 GIC          33 :     pos = (n - 1) * matchctx->npatterns;
    1904 CBC          33 :     if (subexpr > 0)
    1905 GIC          15 :         pos += subexpr - 1;
    1906              33 :     pos *= 2;
    1907              33 :     so = matchctx->match_locs[pos];
    1908              33 :     eo = matchctx->match_locs[pos + 1];
    1909                 : 
    1910 CBC          33 :     if (so < 0 || eo < 0)
    1911               3 :         PG_RETURN_NULL();       /* unidentifiable location */
    1912                 : 
    1913 GIC          30 :     PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
    1914 ECB             :                                         PointerGetDatum(matchctx->orig_str),
    1915                 :                                         Int32GetDatum(so + 1),
    1916                 :                                         Int32GetDatum(eo - so)));
    1917                 : }
    1918                 : 
    1919                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1920                 : Datum
    1921 CBC           9 : regexp_substr_no_start(PG_FUNCTION_ARGS)
    1922 ECB             : {
    1923 CBC           9 :     return regexp_substr(fcinfo);
    1924                 : }
    1925 ECB             : 
    1926                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1927                 : Datum
    1928 CBC           3 : regexp_substr_no_n(PG_FUNCTION_ARGS)
    1929                 : {
    1930 GIC           3 :     return regexp_substr(fcinfo);
    1931                 : }
    1932                 : 
    1933                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1934                 : Datum
    1935              12 : regexp_substr_no_flags(PG_FUNCTION_ARGS)
    1936 ECB             : {
    1937 GIC          12 :     return regexp_substr(fcinfo);
    1938 ECB             : }
    1939                 : 
    1940                 : /* This is separate to keep the opr_sanity regression test from complaining */
    1941                 : Datum
    1942 GIC           6 : regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
    1943 ECB             : {
    1944 GIC           6 :     return regexp_substr(fcinfo);
    1945 ECB             : }
    1946                 : 
    1947                 : /*
    1948                 :  * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
    1949                 :  *
    1950                 :  * The result is NULL if there is no fixed prefix, else a palloc'd string.
    1951                 :  * If it is an exact match, not just a prefix, *exact is returned as true.
    1952                 :  */
    1953                 : char *
    1954 GIC        6461 : regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
    1955                 :                     bool *exact)
    1956                 : {
    1957 ECB             :     char       *result;
    1958                 :     regex_t    *re;
    1959                 :     int         cflags;
    1960                 :     int         re_result;
    1961                 :     pg_wchar   *str;
    1962                 :     size_t      slen;
    1963                 :     size_t      maxlen;
    1964                 :     char        errMsg[100];
    1965                 : 
    1966 GIC        6461 :     *exact = false;             /* default result */
    1967                 : 
    1968                 :     /* Compile RE */
    1969 CBC        6461 :     cflags = REG_ADVANCED;
    1970 GIC        6461 :     if (case_insensitive)
    1971              46 :         cflags |= REG_ICASE;
    1972                 : 
    1973            6461 :     re = RE_compile_and_cache(text_re, cflags | REG_NOSUB, collation);
    1974                 : 
    1975                 :     /* Examine it to see if there's a fixed prefix */
    1976            6449 :     re_result = pg_regprefix(re, &str, &slen);
    1977                 : 
    1978            6449 :     switch (re_result)
    1979                 :     {
    1980             364 :         case REG_NOMATCH:
    1981 CBC         364 :             return NULL;
    1982                 : 
    1983 GIC         626 :         case REG_PREFIX:
    1984 ECB             :             /* continue with wchar conversion */
    1985 CBC         626 :             break;
    1986 ECB             : 
    1987 GIC        5459 :         case REG_EXACT:
    1988 CBC        5459 :             *exact = true;
    1989                 :             /* continue with wchar conversion */
    1990 GIC        5459 :             break;
    1991 ECB             : 
    1992 UIC           0 :         default:
    1993 ECB             :             /* re failed??? */
    1994 LBC           0 :             pg_regerror(re_result, re, errMsg, sizeof(errMsg));
    1995               0 :             ereport(ERROR,
    1996                 :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
    1997 ECB             :                      errmsg("regular expression failed: %s", errMsg)));
    1998                 :             break;
    1999                 :     }
    2000                 : 
    2001                 :     /* Convert pg_wchar result back to database encoding */
    2002 CBC        6085 :     maxlen = pg_database_encoding_max_length() * slen + 1;
    2003 GIC        6085 :     result = (char *) palloc(maxlen);
    2004 CBC        6085 :     slen = pg_wchar2mb_with_len(str, result, slen);
    2005 GIC        6085 :     Assert(slen < maxlen);
    2006 EUB             : 
    2007 GNC        6085 :     pfree(str);
    2008 EUB             : 
    2009 GBC        6085 :     return result;
    2010                 : }

Generated by: LCOV version v1.16-55-g56c0a2a